Setup
load sero-epi functions
source("seroepi_functions.R")
source("https://raw.githubusercontent.com/klebgenomics/KleborateR/main/kleborate_functions.R")
## Loading required package: fs
## Loading required package: glue
set colour palettes
# source: https://davidmathlogic.com/colorblind/#%23D81B60-%231E88E5-%23FFC107-%23004D40
region_cols <- c(`Southern Asia` ="#D81B60", `Western Africa`="#1E88E5", `Eastern Africa`="#FFC107", `Southern Africa`="#81B1A9", Global="black")
group_cols <- c(All="black", Fatal ="#D81B60", ESBL="#1E88E5", CP="#FFC107")
load data on included samples - Table S3
data_NNS_sites10 <- read_tsv("tables/TableS3.tsv")
## Rows: 1877 Columns: 21
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (14): Genome Name, Study, Region, Country, Cluster, ST, K_locus, K_type,...
## dbl (7): Site, Year, Mortality, N50, contig_count, total_size, resistance_s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
K locus analysis
read posterior estimates for adjusted counts (main) and raw counts
(for comparison)
# read adjusted Bayesian estimates for K
kbayes <- parseModelledEstimates(global_post="outputs_core/K_Full_min10_adj_28_posterior_global.csv.gz", region_post="outputs_core/K_Full_min10_adj_28_posterior_subgroup.csv.gz")
## Rows: 1080000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 4320000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
# read raw Bayesian estimates for K
kbayes_raw <- parseModelledEstimates(global_post="outputs_core/K_Full_min10_raw_28_posterior_global.csv.gz", region_post="outputs_core/K_Full_min10_raw_28_posterior_subgroup.csv.gz")
## Rows: 1080000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 4320000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
global and regional prevalence - top20 K
k_prev_global_dist <- locus_ridgesplot(posterior=kbayes$global_post,
ranks=kbayes$locus_rank,
lines_every=10,
xlim=c(0,12), xbreaks=seq(0,12,1), scale=1.5,
title = "a) Global estimates")
## Joining with `by = join_by(locus)`
## Picking joint bandwidth of 0.0729
## Warning: Removed 50 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).

k_prev_region_heatmap <- region_heatmap(kbayes$region_est, kbayes$global_est,
ranks=kbayes$locus_rank, median=F, title="b) Point estimates")
## Warning: Removed 300 rows containing missing values or values outside the scale range
## (`geom_tile()`).
## Warning: Removed 300 rows containing missing values or values outside the scale range
## (`geom_text()`).

fig 2
k_prev_global_dist + k_prev_region_heatmap
## Picking joint bandwidth of 0.0729
## Warning: Removed 50 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).
## Warning: Removed 300 rows containing missing values or values outside the scale range
## (`geom_tile()`).
## Warning: Removed 300 rows containing missing values or values outside the scale range
## (`geom_text()`).

ggsave("figures/Fig2_K_global_regional_Kadj.pdf", width=8, height=6)
## Picking joint bandwidth of 0.0729
## Warning: Removed 50 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).
## Warning: Removed 300 rows containing missing values or values outside the scale range
## (`geom_tile()`).
## Warning: Removed 300 rows containing missing values or values outside the scale range
## (`geom_text()`).
ggsave("figures/Fig2_K_global_regional_Kadj.png", width=8, height=6)
## Picking joint bandwidth of 0.0729
## Warning: Removed 50 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).
## Warning: Removed 300 rows containing missing values or values outside the scale range
## (`geom_tile()`).
## Warning: Removed 300 rows containing missing values or values outside the scale range
## (`geom_text()`).
global and regional prevalence - all K
k_prev_global_dist_all <- locus_ridgesplot(posterior=kbayes$global_post,
ranks=kbayes$locus_rank,
maxRank = 90, axis_font_size=8,
lines_every=10,
xlim=c(0,12), xbreaks=seq(0,12,1), scale=3, title="a) Global estimates")
## Joining with `by = join_by(locus)`
## Picking joint bandwidth of 0.0404
## Warning: Removed 50 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).

k_prev_region_heatmap_all <- region_heatmap(kbayes$region_est, kbayes$global_est,
ranks=kbayes$locus_rank,
maxRank = 90, axis_font_size=8, median=F, title="b) Point estimates")

logistic regression of K vs year + region
result <- c()
for (k in kbayes$locus_rank$locus[1:30]) {
d <- data_NNS_sites10 %>%
mutate(k=if_else(K_locus==k, 1, 0)) %>%
select(k, Year, Region)
m <- summary(logistf(k~Year+Region, data=d))
result <- rbind(result, cbind(coef=m$coefficients, p=m$prob, locus=k))
}
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) -29.08650611 65.03194079 -156.50119345 99.45899194
## Year 0.01367681 0.03223093 -0.05003595 0.07682232
## RegionSouthern Africa -1.07492377 0.26894738 -1.63379372 -0.57292847
## RegionSouthern Asia -2.82549482 0.40638039 -3.72886712 -2.10776850
## RegionWestern Africa -1.29204824 0.49182954 -2.41248429 -0.43703467
## Chisq p method
## (Intercept) 0.1983579 6.560496e-01 2
## Year 0.1785592 6.726144e-01 2
## RegionSouthern Africa 19.5146241 9.983246e-06 2
## RegionSouthern Asia Inf 0.000000e+00 2
## RegionWestern Africa 9.9111038 1.642846e-03 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=125.8823 on 4 df, p=0, n=1877
## Wald test = 616.8107 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(k ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year,
## RegionSouthern Africa, RegionSouthern Asia, RegionWestern Africa exceeded. Try
## to increase the number of iterations by passing 'logistpl.control(maxit=...)'
## to parameter plcontrol
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) 130.00000000 43.22353251 -370.0000000 630.0000000
## Year -0.06458702 0.02142338 -0.3125582 0.1829517
## RegionSouthern Africa -0.01023999 0.15334746 -10.5650041 2.1760917
## RegionSouthern Asia -0.12224666 0.11896939 -10.3843446 1.4477985
## RegionWestern Africa -0.11487808 0.24194477 -25.0351582 4.6747241
## Chisq p method
## (Intercept) 0.000000 1.000000e+00 2
## Year 0.000000 1.000000e+00 2
## RegionSouthern Africa 2.549452 1.103325e-01 2
## RegionSouthern Asia 44.206672 2.954725e-11 2
## RegionWestern Africa 3.522880 6.052718e-02 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=-345.1287 on 4 df, p=1, n=1877
## Wald test = 83.59206 on 4 df, p = 0logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) -72.35455265 101.63799482 -272.95985795 130.0138677
## Year 0.03435712 0.05037043 -0.06594629 0.1337621
## RegionSouthern Africa 1.61971153 0.23776879 1.15508425 2.0919412
## RegionSouthern Asia -0.83100114 0.35498796 -1.58030718 -0.1700594
## RegionWestern Africa -0.89610551 0.83527176 -3.08481192 0.4346505
## Chisq p method
## (Intercept) 0.4952034 0.4816153 2
## Year 0.4546540 0.5001331 2
## RegionSouthern Africa Inf 0.0000000 2
## RegionSouthern Asia 6.2290525 0.0125671 2
## RegionWestern Africa 1.5142666 0.2184892 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=81.90503 on 4 df, p=1.110223e-16, n=1877
## Wald test = 678.5959 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) -121.52670942 73.43279592 -267.12400641 22.3627828
## Year 0.05879294 0.03639016 -0.01251884 0.1309384
## RegionSouthern Africa -2.28668794 0.81621578 -4.47188270 -0.9692806
## RegionSouthern Asia 1.64898827 0.18342303 1.29331880 2.0146749
## RegionWestern Africa -0.52044139 0.65649154 -2.10968342 0.5785972
## Chisq p method
## (Intercept) 2.7368651 9.805696e-02 2
## Year 2.6075768 1.063542e-01 2
## RegionSouthern Africa 15.9920760 6.360816e-05 2
## RegionSouthern Asia Inf 0.000000e+00 2
## RegionWestern Africa 0.7277568 3.936113e-01 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=158.4881 on 4 df, p=0, n=1877
## Wald test = 632.3645 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(k ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year exceeded.
## Try to increase the number of iterations by passing
## 'logistpl.control(maxit=...)' to parameter plcontrol
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) -130.00000000 84.31953547 -630.0000000 370.000000
## Year 0.06304681 0.04178511 -0.1851209 0.310525
## RegionSouthern Africa 0.31769241 0.27165518 -1.8369696 2.554567
## RegionSouthern Asia 0.13270029 0.22680909 -1.7342323 2.090457
## RegionWestern Africa 0.39596654 0.42209128 -4.3975868 3.245775
## Chisq p method
## (Intercept) 0.000000 1.000000e+00 2
## Year 0.000000 1.000000e+00 2
## RegionSouthern Africa 27.188599 1.845438e-07 2
## RegionSouthern Asia 14.625671 1.311163e-04 2
## RegionWestern Africa 3.373524 6.625189e-02 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=-20.39887 on 4 df, p=1, n=1877
## Wald test = 833.0725 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(k ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year,
## RegionWestern Africa exceeded. Try to increase the number of iterations by
## passing 'logistpl.control(maxit=...)' to parameter plcontrol
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95 Chisq p
## (Intercept) -130.00000000 79.06178898 -630.0000000 370.000000 0 1
## Year 0.06322928 0.03917991 -0.1848394 0.310769 0 1
## RegionSouthern Africa -0.15289554 0.27007254 -6.5911993 2.802949 0 1
## RegionSouthern Asia -0.50137616 0.23177200 -12.3641743 1.784740 0 1
## RegionWestern Africa -0.46106602 0.50244411 -47.4440673 3.499484 0 1
## method
## (Intercept) 2
## Year 2
## RegionSouthern Africa 2
## RegionSouthern Asia 2
## RegionWestern Africa 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=-60.15645 on 4 df, p=1, n=1877
## Wald test = 841.6061 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(k ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year exceeded.
## Try to increase the number of iterations by passing
## 'logistpl.control(maxit=...)' to parameter plcontrol
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) -130.00000000 114.96786210 -630.0000000 370.000000
## Year 0.06261730 0.05697248 -0.1855926 0.310063
## RegionSouthern Africa 0.07047895 0.42790563 -4.5463479 3.314323
## RegionSouthern Asia 0.69972099 0.29094416 -0.7228661 3.838253
## RegionWestern Africa 0.31898132 0.63620254 -8.7612434 4.236022
## Chisq p method
## (Intercept) 0.000000 1.000000e+00 2
## Year 0.000000 1.000000e+00 2
## RegionSouthern Africa 2.492300 1.144044e-01 2
## RegionSouthern Asia 63.179126 1.887379e-15 2
## RegionWestern Africa 1.256183 2.623749e-01 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=-2.263486 on 4 df, p=1, n=1877
## Wald test = 695.9312 on 4 df, p = 0logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) -47.09034721 152.92946154 -353.6408686 262.8169147
## Year 0.02133827 0.07579197 -0.1322824 0.1732344
## RegionSouthern Africa -0.21198907 0.60557530 -1.5789092 0.8785118
## RegionSouthern Asia -0.01005697 0.43669107 -0.9213354 0.8289919
## RegionWestern Africa 1.47348782 0.49872705 0.3935160 2.3969498
## Chisq p method
## (Intercept) 0.0900670734 0.764091903 2
## Year 0.0753019063 0.783768077 2
## RegionSouthern Africa 0.1262179775 0.722386058 2
## RegionSouthern Asia 0.0005197518 0.981811351 2
## RegionWestern Africa 6.6426376582 0.009956641 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=7.313081 on 4 df, p=0.1202397, n=1877
## Wald test = 531.5882 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(k ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year,
## RegionSouthern Africa, RegionSouthern Asia exceeded. Try to increase the number
## of iterations by passing 'logistpl.control(maxit=...)' to parameter plcontrol
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) 130.00000000 90.18028308 -370.0000000 630.0000000
## Year -0.06573042 0.04470224 -0.3137769 0.1818625
## RegionSouthern Africa -0.57785009 0.37883740 -43.4911883 1.7012637
## RegionSouthern Asia -0.47058847 0.26951153 -10.5563668 1.6210080
## RegionWestern Africa -0.29862098 0.52589872 -20.4370983 3.0991820
## Chisq p method
## (Intercept) 0.00000 1.000000e+00 2
## Year 0.00000 1.000000e+00 2
## RegionSouthern Africa 49.10809 2.422396e-12 2
## RegionSouthern Asia 56.95319 4.463097e-14 2
## RegionWestern Africa 2.77248 9.589750e-02 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=-22.01399 on 4 df, p=1, n=1877
## Wald test = 801.6743 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) -79.86133195 183.12319603 -471.9157131 280.3603917
## Year 0.03633018 0.09075138 -0.1422319 0.2305735
## RegionSouthern Africa 0.32211716 1.63513706 -4.6710715 3.2790074
## RegionSouthern Asia 3.49186090 0.85080529 2.0875325 5.7081797
## RegionWestern Africa 1.52994867 1.62153311 -3.4567945 4.4700332
## Chisq p method
## (Intercept) 0.17736798 0.6736450 2
## Year 0.14921118 0.6992903 2
## RegionSouthern Africa 0.03629103 0.8489157 2
## RegionSouthern Asia Inf 0.0000000 2
## RegionWestern Africa 0.65602285 0.4179675 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=48.01091 on 4 df, p=9.388562e-10, n=1877
## Wald test = 294.0508 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(k ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year,
## RegionSouthern Africa, RegionSouthern Asia, RegionWestern Africa exceeded. Try
## to increase the number of iterations by passing 'logistpl.control(maxit=...)'
## to parameter plcontrol
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95 Chisq p
## (Intercept) 130.00000000 77.84694236 -370.000000 630.0000000 0 1
## Year -0.06573852 0.03858862 -0.313879 0.1817258 0 1
## RegionSouthern Africa 0.17156952 0.29129948 -41.297493 3.4329051 0 1
## RegionSouthern Asia 0.60752198 0.20149983 -3.200364 13.4421086 0 1
## RegionWestern Africa 0.06013058 0.45675179 -42.019273 21.3314949 0 1
## method
## (Intercept) 2
## Year 2
## RegionSouthern Africa 2
## RegionSouthern Asia 2
## RegionWestern Africa 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=-123.3719 on 4 df, p=1, n=1877
## Wald test = 839.4505 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(k ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year exceeded.
## Try to increase the number of iterations by passing
## 'logistpl.control(maxit=...)' to parameter plcontrol
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) -130.00000000 121.88183870 -630.0000000 370.000000
## Year 0.06259038 0.06039869 -0.1855915 0.310053
## RegionSouthern Africa 0.32720775 0.40567347 -2.6546470 3.452844
## RegionSouthern Asia 0.51769598 0.31154462 -1.1543532 3.460549
## RegionWestern Africa -0.12299438 0.78568187 -24.6103203 3.779354
## Chisq p method
## (Intercept) 0.00000 1.000000e+00 2
## Year 0.00000 1.000000e+00 2
## RegionSouthern Africa 13.49821 2.387915e-04 2
## RegionSouthern Asia 38.35544 5.896340e-10 2
## RegionWestern Africa 0.00000 1.000000e+00 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=-6.806337 on 4 df, p=1, n=1877
## Wald test = 676.9765 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(k ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year exceeded.
## Try to increase the number of iterations by passing
## 'logistpl.control(maxit=...)' to parameter plcontrol
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95 Chisq
## (Intercept) 130.00000000 150.22702926 -370.0000000 630.0000000 0
## Year -0.06669531 0.07446882 -0.3149471 0.1806365 0
## RegionSouthern Africa 3.31188907 0.37653879 2.6648059 13.4234313 Inf
## RegionSouthern Asia 0.55438541 0.47776614 -2.8049682 10.0039977 0
## RegionWestern Africa 0.41428263 0.95750485 -20.2063161 10.3553847 0
## p method
## (Intercept) 1 2
## Year 1 2
## RegionSouthern Africa 0 2
## RegionSouthern Asia 1 2
## RegionWestern Africa 1 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=153.7819 on 4 df, p=0, n=1877
## Wald test = 474.4761 on 4 df, p = 0logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) 7.420644949 170.28166884 -331.7992812 360.5309195
## Year -0.005624995 0.08439765 -0.1806764 0.1624673
## RegionSouthern Africa -0.262067625 0.60790858 -1.6330655 0.8385810
## RegionSouthern Asia -1.301046960 0.68293132 -2.9327091 -0.1105894
## RegionWestern Africa 0.551000707 0.67643559 -1.0677861 1.7171036
## Chisq p method
## (Intercept) 0.001786984 0.96628128 2
## Year 0.004181784 0.94843937 2
## RegionSouthern Africa 0.192205246 0.66108751 2
## RegionSouthern Asia 4.687770695 0.03037804 2
## RegionWestern Africa 0.571963019 0.44947992 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=6.446389 on 4 df, p=0.1681998, n=1877
## Wald test = 475.1002 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(k ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year,
## RegionSouthern Africa, RegionWestern Africa exceeded. Try to increase the
## number of iterations by passing 'logistpl.control(maxit=...)' to parameter
## plcontrol
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) 130.00000000 106.6712283 -370.0000000 630.000000
## Year -0.06610104 0.0528774 -0.3142432 0.181374
## RegionSouthern Africa -0.03381126 0.4330638 -37.0344853 3.484746
## RegionSouthern Asia 0.65278808 0.2731829 -1.7103328 6.371464
## RegionWestern Africa -0.01048477 0.6523523 -41.4718894 6.287870
## Chisq p method
## (Intercept) 0.00000000 1.0000000 2
## Year 0.00000000 1.0000000 2
## RegionSouthern Africa 1.86500593 0.1720482 2
## RegionSouthern Asia 0.00000000 1.0000000 2
## RegionWestern Africa 0.05313081 0.8177023 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=-30.6719 on 4 df, p=1, n=1877
## Wald test = 733.871 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(k ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year exceeded.
## Try to increase the number of iterations by passing
## 'logistpl.control(maxit=...)' to parameter plcontrol
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) -130.00000000 104.25616519 -630.0000000 370.0000000
## Year 0.06287257 0.05166456 -0.1852241 0.3103878
## RegionSouthern Africa -0.29736973 0.38731630 -10.5316925 2.6850990
## RegionSouthern Asia -0.20084701 0.29155790 -5.0752118 2.3949611
## RegionWestern Africa 0.28346335 0.51301932 -9.1103198 4.0187789
## Chisq p method
## (Intercept) 0.000000 1.0000000 2
## Year 0.000000 1.0000000 2
## RegionSouthern Africa 0.000000 1.0000000 2
## RegionSouthern Asia 0.000000 1.0000000 2
## RegionWestern Africa 1.896231 0.1685008 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=-33.20581 on 4 df, p=1, n=1877
## Wald test = 754.8831 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(k ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year,
## RegionSouthern Africa, RegionSouthern Asia exceeded. Try to increase the number
## of iterations by passing 'logistpl.control(maxit=...)' to parameter plcontrol
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95 Chisq
## (Intercept) 130.00000000 139.78944005 -370.0000000 630.0000000 0
## Year -0.06634971 0.06929458 -0.3144698 0.1811433 0
## RegionSouthern Africa 0.70937688 0.43512388 -2.0275186 3.3670256 0
## RegionSouthern Asia 0.22517648 0.38552872 -3.3829029 3.8223126 0
## RegionWestern Africa 0.83049534 0.58631166 -4.3707455 5.0709390 0
## p method
## (Intercept) 1 2
## Year 1 2
## RegionSouthern Africa 1 2
## RegionSouthern Asia 1 2
## RegionWestern Africa 1 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=-11.41955 on 4 df, p=1, n=1877
## Wald test = 616.4021 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(k ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year,
## RegionSouthern Africa, RegionSouthern Asia, RegionWestern Africa exceeded. Try
## to increase the number of iterations by passing 'logistpl.control(maxit=...)'
## to parameter plcontrol
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) -130.00000000 87.28676591 -630.0000000 370.0000000
## Year 0.06303238 0.04325552 -0.1851036 0.3105265
## RegionSouthern Africa 0.17034884 0.28748605 -6.9121863 6.4939853
## RegionSouthern Asia 0.02727847 0.23636753 -5.1421317 6.3164135
## RegionWestern Africa 0.11146681 0.47567582 -35.7801722 7.5356595
## Chisq p method
## (Intercept) 0.0000000 1.000000e+00 2
## Year 0.0000000 1.000000e+00 2
## RegionSouthern Africa 20.0571285 7.516284e-06 2
## RegionSouthern Asia 4.3680614 3.661863e-02 2
## RegionWestern Africa 0.9916866 3.193305e-01 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=-85.99555 on 4 df, p=1, n=1877
## Wald test = 827.6309 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(k ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year exceeded.
## Try to increase the number of iterations by passing
## 'logistpl.control(maxit=...)' to parameter plcontrol
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) -130.00000000 116.25483104 -630.0000000 370.0000000
## Year 0.06274156 0.05761037 -0.1853605 0.3102526
## RegionSouthern Africa -0.37017067 0.44900063 -12.9130466 2.5215327
## RegionSouthern Asia -0.05112558 0.31544622 -3.5529702 2.5578854
## RegionWestern Africa 0.14176805 0.61192393 -11.5104451 3.8271429
## Chisq p method
## (Intercept) 0.0000000 1.0000000 2
## Year 0.0000000 1.0000000 2
## RegionSouthern Africa 0.0000000 1.0000000 2
## RegionSouthern Asia 0.0000000 1.0000000 2
## RegionWestern Africa 0.5927603 0.4413537 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=-23.81169 on 4 df, p=1, n=1877
## Wald test = 699.8399 on 4 df, p = 0logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) -52.71973388 185.52307640 -426.8433204 333.55656500
## Year 0.02407423 0.09194473 -0.1674098 0.20944261
## RegionSouthern Africa 0.52545022 0.50222720 -0.5268000 1.49573070
## RegionSouthern Asia -1.13524851 0.69351045 -2.7824427 0.08920658
## RegionWestern Africa -0.87645399 1.42719622 -5.7285319 1.14044931
## Chisq p method
## (Intercept) 0.07441045 0.78502020 2
## Year 0.06319436 0.80151656 2
## RegionSouthern Africa 1.02102891 0.31227507 2
## RegionSouthern Asia 3.25231813 0.07132252 2
## RegionWestern Africa 0.49146881 0.48327283 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=6.439186 on 4 df, p=0.1686627, n=1877
## Wald test = 448.9562 on 4 df, p = 0logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) -28.11944599 201.9950139 -440.1100499 395.8047393
## Year 0.01178341 0.1001114 -0.1983748 0.2159155
## RegionSouthern Africa -0.22083563 0.7190015 -1.8948708 1.0733278
## RegionSouthern Asia -0.57090132 0.6211546 -1.9728448 0.5793660
## RegionWestern Africa -0.66879246 1.4295508 -5.5241974 1.3677154
## Chisq p method
## (Intercept) 0.01757483 0.8945334 2
## Year 0.01256792 0.9107388 2
## RegionSouthern Africa 0.09620801 0.7564285 2
## RegionSouthern Asia 0.88351149 0.3472417 2
## RegionWestern Africa 0.26430732 0.6071763 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=1.08986 on 4 df, p=0.8958777, n=1877
## Wald test = 408.0509 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(k ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year,
## RegionSouthern Asia exceeded. Try to increase the number of iterations by
## passing 'logistpl.control(maxit=...)' to parameter plcontrol
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) -130.00000000 100.3352891 -630.0000000 370.0000000
## Year 0.06291015 0.0497216 -0.1851715 0.3104293
## RegionSouthern Africa -0.34182503 0.3796909 -26.0922336 3.7194771
## RegionSouthern Asia -0.46700800 0.3043030 -46.5077127 2.8115483
## RegionWestern Africa 1.26287077 0.3538492 -2.9177119 5.9806703
## Chisq p method
## (Intercept) 0.00000 1.000000e+00 2
## Year 0.00000 1.000000e+00 2
## RegionSouthern Africa 0.00000 1.000000e+00 2
## RegionSouthern Asia 0.00000 1.000000e+00 2
## RegionWestern Africa 27.07174 1.960426e-07 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=-25.15816 on 4 df, p=1, n=1877
## Wald test = 752.8781 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(k ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year exceeded.
## Try to increase the number of iterations by passing
## 'logistpl.control(maxit=...)' to parameter plcontrol
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) 128.06114845 206.4571958 -371.9388515 628.0611485
## Year -0.06586887 0.1023425 -0.3139448 0.1817288
## RegionSouthern Africa -0.33622422 0.9848800 -7.4776358 2.0333023
## RegionSouthern Asia 0.84178862 0.5226748 -0.3603506 2.7330925
## RegionWestern Africa 0.18648490 1.2167938 -6.4871674 2.8714007
## Chisq p method
## (Intercept) 0.000000 1.0000000 2
## Year 0.000000 1.0000000 2
## RegionSouthern Africa 1.834065 0.1756482 2
## RegionSouthern Asia 1.518230 0.2178876 2
## RegionWestern Africa 0.000000 1.0000000 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=2.829322 on 4 df, p=0.5867819, n=1877
## Wald test = 389.2235 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year exceeded.
## Try to increase the number of iterations by passing
## 'logistpl.control(maxit=...)' to parameter plcontrol
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) 65.93766430 230.1374922 -408.5241645 559.5755926
## Year -0.03506353 0.1140730 -0.2798187 0.2000418
## RegionSouthern Africa -1.27443921 1.4698048 -6.1558038 0.9102556
## RegionSouthern Asia -0.01607097 0.6588362 -1.4796440 1.2487212
## RegionWestern Africa 1.45169819 0.7213820 -0.2283746 2.7645777
## Chisq p method
## (Intercept) 0.0726569591 0.78750682 2
## Year 0.0836633400 0.77239301 2
## RegionSouthern Africa 1.0660679067 0.30183530 2
## RegionSouthern Asia 0.0005640451 0.98105231 2
## RegionWestern Africa 2.9993111133 0.08329993 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=4.934876 on 4 df, p=0.2940451, n=1877
## Wald test = 334.2816 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(k ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year,
## RegionSouthern Africa, RegionSouthern Asia, RegionWestern Africa exceeded. Try
## to increase the number of iterations by passing 'logistpl.control(maxit=...)'
## to parameter plcontrol
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) -130.00000000 65.85606555 -630.0000000 370.0000000
## Year 0.06329311 0.03263579 -0.1848979 0.3107595
## RegionSouthern Africa -0.11499476 0.24976947 -45.0459598 161.1690173
## RegionSouthern Asia 0.60882347 0.16787832 -1.3164568 201.2516544
## RegionWestern Africa 0.03014043 0.39273683 -45.2285140 71.1616140
## Chisq p method
## (Intercept) 0.0000000 1.0000000 2
## Year 0.0000000 1.0000000 2
## RegionSouthern Africa 0.0000000 1.0000000 2
## RegionSouthern Asia Inf 0.0000000 2
## RegionWestern Africa 0.4488681 0.5028729 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=-142.6155 on 4 df, p=1, n=1877
## Wald test = 804.9451 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(k ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year,
## RegionSouthern Africa, RegionWestern Africa exceeded. Try to increase the
## number of iterations by passing 'logistpl.control(maxit=...)' to parameter
## plcontrol
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95 Chisq
## (Intercept) 130.00000000 117.56608006 -370.0000000 630.0000000 0
## Year -0.06620446 0.05827815 -0.3143498 0.1812633 0
## RegionSouthern Africa 0.28571022 0.42925118 -9.0596109 3.6642885 0
## RegionSouthern Asia 0.60788589 0.30353247 -1.8463481 6.4815622 0
## RegionWestern Africa 0.04502722 0.70295288 -39.0777259 6.4664769 0
## p method
## (Intercept) 1 2
## Year 1 2
## RegionSouthern Africa 1 2
## RegionSouthern Asia 1 2
## RegionWestern Africa 1 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=-27.49681 on 4 df, p=1, n=1877
## Wald test = 693.7843 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year exceeded.
## Try to increase the number of iterations by passing
## 'logistpl.control(maxit=...)' to parameter plcontrol
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) 3.676462016 253.7950346 -496.3235380 503.6764620
## Year -0.005606705 0.1257882 -0.2535066 0.2421305
## RegionSouthern Africa 4.476694830 1.4479516 2.3161664 9.3602639
## RegionSouthern Asia 3.538018747 1.4480523 1.3530294 8.4215874
## RegionWestern Africa 2.642478905 1.9654650 -2.5785030 7.8634935
## Chisq p method
## (Intercept) 0.000174254 9.894678e-01 2
## Year 0.001646778 9.676303e-01 2
## RegionSouthern Africa 52.550995413 4.192202e-13 2
## RegionSouthern Asia 23.039663156 1.586934e-06 2
## RegionWestern Africa 1.390640286 2.382970e-01 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=30.8761 on 4 df, p=3.244823e-06, n=1877
## Wald test = 266.1445 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(k ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year,
## RegionSouthern Africa, RegionSouthern Asia exceeded. Try to increase the number
## of iterations by passing 'logistpl.control(maxit=...)' to parameter plcontrol
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) 130.00000000 106.18825133 -370.0000000 630.0000000
## Year -0.06586743 0.05263766 -0.3138909 0.1817642
## RegionSouthern Africa -0.66400028 0.44708640 -12.8294993 1.5521432
## RegionSouthern Asia -0.91382504 0.35672565 -35.1888289 0.5321249
## RegionWestern Africa -0.35259113 0.61077977 -11.8880848 2.4237312
## Chisq p method
## (Intercept) 0.000000 1.000000e+00 2
## Year 0.000000 1.000000e+00 2
## RegionSouthern Africa 38.326438 5.984633e-10 2
## RegionSouthern Asia 68.763252 1.110223e-16 2
## RegionWestern Africa 2.258624 1.328721e-01 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=6.049927 on 4 df, p=0.1954506, n=1877
## Wald test = 723.8049 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(k ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year exceeded.
## Try to increase the number of iterations by passing
## 'logistpl.control(maxit=...)' to parameter plcontrol
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) 126.55629754 226.4146315 -373.4437025 626.5562975
## Year -0.06591441 0.1122354 -0.3143073 0.1814108
## RegionSouthern Africa 1.65124887 1.0630958 -0.6986776 9.0208950
## RegionSouthern Asia 2.79917078 0.7923188 1.6598413 10.1298860
## RegionWestern Africa 1.57874856 1.4763027 -3.7763064 9.1347926
## Chisq p method
## (Intercept) 0.0000000 1.000000e+00 2
## Year 0.0000000 1.000000e+00 2
## RegionSouthern Africa 2.1108474 1.462583e-01 2
## RegionSouthern Asia 26.4430527 2.714229e-07 2
## RegionWestern Africa 0.7949796 3.725986e-01 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=20.31583 on 4 df, p=0.0004325695, n=1877
## Wald test = 293.245 on 4 df, p = 0
## Warning in logistf(k ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(k ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year,
## RegionSouthern Africa, RegionSouthern Asia, RegionWestern Africa exceeded. Try
## to increase the number of iterations by passing 'logistpl.control(maxit=...)'
## to parameter plcontrol
## logistf(formula = k ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95 Chisq p
## (Intercept) -130.00000000 50.16401953 -630.000000 370.0000000 0 1
## Year 0.06384303 0.02486061 -0.184252 0.3114046 0 1
## RegionSouthern Africa -0.31535749 0.18131644 -48.550315 5.4002944 0 1
## RegionSouthern Asia -0.26444716 0.13929372 -30.498215 5.4145905 0 1
## RegionWestern Africa -0.19376061 0.28818003 -49.093470 5.2043916 0 1
## method
## (Intercept) 2
## Year 2
## RegionSouthern Africa 2
## RegionSouthern Asia 2
## RegionWestern Africa 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=-514.6524 on 4 df, p=1, n=1877
## Wald test = 519.0254 on 4 df, p = 0
logistic_sig <- as_tibble(result, rownames="variable") %>% filter(p<0.05) %>% left_join(kbayes$locus_rank)
## Joining with `by = join_by(locus)`
p<-0.02
region_2pc <- kbayes$region_est %>% select(locus, subgroup, mean) %>%
pivot_wider(id_cols=locus, names_from=subgroup, values_from = mean) %>%
mutate(d1=abs(`Eastern Africa`-`Southern Africa`),
d2=abs(`Eastern Africa`-`Western Africa`),
d3=abs(`Eastern Africa`-`Southern Asia`),
d4=abs(`Southern Africa`-`Southern Asia`),
d5=abs(`Southern Africa`-`Western Africa`),
d6=abs(`Western Africa`-`Southern Asia`)) %>%
filter(d1>p | d2>p | d3>p | d4>p | d5>p | d6>p)
Fig S3 - overlapping regional densities - log2 scale
kbayes$region_post %>%
left_join(kbayes$locus_rank) %>%
filter(rank<=30) %>%
mutate(Region=fct_relevel(subgroup,c("Western Africa", "Southern Africa", "Eastern Africa", "Southern Asia"))) %>%
ggplot() +
geom_hline(yintercept=11) +
geom_hline(yintercept=21) +
geom_density_ridges(aes(x=log2(prob), y=locus, fill=Region),
scale=1, rel_min_height = 0.01, alpha=0.5, col=NA) +
scale_y_discrete(limits=rev(kbayes$locus_rank$locus[1:30])) +
scale_x_continuous(limits=c(-13,0)) +
labs(x="log2(prevalence) per region", y="", fill="Region") +
annotate(y=region_2pc$locus, label="*", x=-1, geom="text") +
annotate(y=logistic_sig$locus, label="^", x=-0.5, geom="text") +
scale_fill_manual(values=region_cols) +
theme_bw()
## Joining with `by = join_by(locus)`
## Picking joint bandwidth of 0.123
## Warning: Removed 2676 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_text()`).

ggsave("figures/FigS3_RegionalKBayesDist.pdf", width=6, height=9)
## Picking joint bandwidth of 0.123
## Warning: Removed 2676 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_text()`).
ggsave("figures/FigS3_RegionalKBayesDist.png", width=6, height=9)
## Picking joint bandwidth of 0.123
## Warning: Removed 2676 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_text()`).
O locus analysis
read posterior estimates for adjusted counts (main) and raw counts
(for comparison)
# read adjusted Bayesian estimates for O
obayes <- parseModelledEstimates(global_post ="outputs_core/O_Full_min10_adj_28_posterior_global.csv.gz", region_post = "outputs_core/O_Full_min10_adj_28_posterior_subgroup.csv.gz", fixOnames = T)
## Rows: 180000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 720000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
# read raw Bayesian estimates for O
obayes_raw <- parseModelledEstimates(global_post ="outputs_core/O_Full_min10_raw_28_posterior_global.csv.gz", region_post = "outputs_core/O_Full_min10_raw_28_posterior_subgroup.csv.gz", fixOnames = T)
## Rows: 180000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 720000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
global and regional prevalence
o_prev_global_dist <- locus_ridgesplot(posterior=obayes$global_post,
ranks=obayes$locus_rank,
lines_every=5, maxRank=15,
xlim=c(0,30), xbreaks=seq(0,30,5), title="a) Global estimates")
## Joining with `by = join_by(locus)`
## Picking joint bandwidth of 0.0913
## Warning: Removed 118 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).

o_prev_region_heatmap <- region_heatmap(estimates=obayes$region_est, global=obayes$global_est,
ranks=obayes$locus_rank, maxRank=15, median=F, title="b) Point estimates")

# fig 4a
o_prev_global_dist + o_prev_region_heatmap
## Picking joint bandwidth of 0.0913
## Warning: Removed 118 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).

compare global prevalence distributions using cluster-adjusted and
raw counts
# plot raw vs adjusted distributions, overlaid - for appendix s2.4b
o_dist_raw_adj <- comparative_locus_ridgesplot(posterior1=obayes$global_post,
posterior2=obayes_raw$global_post,
ranks=obayes$locus_rank, maxRank=10,
lines_every=5, xlim=c(0,30), xbreaks=seq(0,30,5))
## Joining with `by = join_by(locus)`
## Picking joint bandwidth of 0.104
## Warning: Removed 118 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).

logistic regression of O vs year + region
oresult <- c()
for (o in obayes$locus_rank$locus[1:10]) {
d <- data_NNS_sites10 %>%
mutate(o=if_else(O_genotype==o, 1, 0)) %>%
select(o, Year, Region)
m <- summary(logistf(o~Year+Region, data=d))
oresult <- rbind(oresult, cbind(coef=m$coefficients, p=m$prob, locus=o))
}
## logistf(formula = o ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) 55.72275487 48.65687639 -39.59451130 151.36548583
## Year -0.02808481 0.02411652 -0.07549096 0.01915739
## RegionSouthern Africa -0.26589918 0.17803759 -0.62185989 0.07728888
## RegionSouthern Asia -0.43447514 0.14023734 -0.71286922 -0.16247693
## RegionWestern Africa 0.30925325 0.24948359 -0.19237050 0.78975161
## Chisq p method
## (Intercept) 1.311849 0.252060164 2
## Year 1.356577 0.244131576 2
## RegionSouthern Africa 2.289580 0.130245138 2
## RegionSouthern Asia 9.915732 0.001638719 2
## RegionWestern Africa 1.491167 0.222035428 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=19.76395 on 4 df, p=0.0005559294, n=1877
## Wald test = 420.5418 on 4 df, p = 0
## Warning in logistf(o ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(o ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year exceeded.
## Try to increase the number of iterations by passing
## 'logistpl.control(maxit=...)' to parameter plcontrol
## logistf(formula = o ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) -130.00000000 44.74228687 -630.0000000 370.0000000
## Year 0.06411214 0.02217449 -0.1840187 0.3117042
## RegionSouthern Africa -0.09329761 0.15613474 -4.1012937 2.4814756
## RegionSouthern Asia -0.29350871 0.12410411 -4.4543442 1.5381654
## RegionWestern Africa 0.04044815 0.24579937 -8.5537389 4.3857690
## Chisq p method
## (Intercept) 0.0000000 1.0000000 2
## Year 0.0000000 1.0000000 2
## RegionSouthern Africa 0.0000000 1.0000000 2
## RegionSouthern Asia 0.0000000 1.0000000 2
## RegionWestern Africa 0.8393326 0.3595877 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=-190.2889 on 4 df, p=1, n=1877
## Wald test = 202.5556 on 4 df, p = 0
## Warning in logistf(o ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(o ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year,
## RegionSouthern Africa, RegionSouthern Asia, RegionWestern Africa exceeded. Try
## to increase the number of iterations by passing 'logistpl.control(maxit=...)'
## to parameter plcontrol
## logistf(formula = o ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95 Chisq
## (Intercept) 130.00000000 42.9719403 -370.0000000 630.0000000 0.00000
## Year -0.06453396 0.0212985 -0.3124635 0.1830676 0.00000
## RegionSouthern Africa -0.08209284 0.1526063 -6.2858468 1.9348022 15.91919
## RegionSouthern Asia -0.20637185 0.1183976 -6.4973577 1.1423861 60.01016
## RegionWestern Africa -0.20768016 0.2414996 -17.8701351 4.1530075 6.88447
## p method
## (Intercept) 1.000000e+00 2
## Year 1.000000e+00 2
## RegionSouthern Africa 6.610495e-05 2
## RegionSouthern Asia 9.436896e-15 2
## RegionWestern Africa 8.694785e-03 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=-137.7519 on 4 df, p=1, n=1877
## Wald test = 65.01845 on 4 df, p = 2.550182e-13logistf(formula = o ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) -48.26043537 67.94536038 -182.34702833 85.14272175
## Year 0.02251919 0.03367381 -0.04360084 0.08896741
## RegionSouthern Africa 0.96357207 0.23851992 0.48820153 1.42660033
## RegionSouthern Asia 1.83299227 0.17662010 1.49100642 2.18517899
## RegionWestern Africa -0.57799418 0.65583704 -2.16592253 0.51819343
## Chisq p method
## (Intercept) 0.5019927 0.4786258406 2
## Year 0.4449356 0.5047498289 2
## RegionSouthern Africa 15.1209247 0.0001008394 2
## RegionSouthern Asia Inf 0.0000000000 2
## RegionWestern Africa 0.9168523 0.3383028256 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=147.9369 on 4 df, p=0, n=1877
## Wald test = 666.9666 on 4 df, p = 0
## Warning in logistf(o ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(o ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year,
## RegionSouthern Asia, RegionWestern Africa exceeded. Try to increase the number
## of iterations by passing 'logistpl.control(maxit=...)' to parameter plcontrol
## logistf(formula = o ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) -130.00000000 54.58600970 -630.0000000 370.0000000
## Year 0.06365555 0.02705153 -0.1844764 0.3111785
## RegionSouthern Africa -0.19235119 0.19694217 -18.8674877 4.5468653
## RegionSouthern Asia -0.02903790 0.14843972 -4.6562358 4.4451879
## RegionWestern Africa 0.14718588 0.29376962 -22.7968693 5.4840734
## Chisq p method
## (Intercept) 0.000000 1.00000000 2
## Year 0.000000 1.00000000 2
## RegionSouthern Africa 0.000000 1.00000000 2
## RegionSouthern Asia 0.000000 1.00000000 2
## RegionWestern Africa 3.733312 0.05333754 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=-248.1846 on 4 df, p=1, n=1877
## Wald test = 661.2743 on 4 df, p = 0logistf(formula = o ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) 74.29619218 125.88249011 -177.30329704 325.51725457
## Year -0.03872959 0.06239698 -0.16327291 0.08596303
## RegionSouthern Africa -0.02298928 0.53886400 -1.21235318 0.95619149
## RegionSouthern Asia 0.85552298 0.32531599 0.20865445 1.50033233
## RegionWestern Africa 1.08745681 0.52935370 -0.08820886 2.04304632
## Chisq p method
## (Intercept) 0.336144039 0.562063457 2
## Year 0.371791359 0.542029440 2
## RegionSouthern Africa 0.001815434 0.966014093 2
## RegionSouthern Asia 6.661335030 0.009852707 2
## RegionWestern Africa 3.348037206 0.067285199 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=9.603924 on 4 df, p=0.04765508, n=1877
## Wald test = 623.2345 on 4 df, p = 0
## Warning in logistf(o ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## logistf(formula = o ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) 84.79724993 129.76090576 -173.6805811 345.20609397
## Year -0.04395748 0.06432027 -0.1730579 0.08414509
## RegionSouthern Africa 2.36814626 0.31453954 1.7624901 3.00841585
## RegionSouthern Asia 0.35014324 0.37621814 -0.4183043 1.08016764
## RegionWestern Africa -1.11436778 1.42969126 -5.9629769 0.88233808
## Chisq p method
## (Intercept) 0.4122658 0.5208217 2
## Year 0.4509773 0.5018712 2
## RegionSouthern Africa Inf 0.0000000 2
## RegionSouthern Asia 0.8300018 0.3622720 2
## RegionWestern Africa 0.8737337 0.3499237 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=76.57055 on 4 df, p=8.881784e-16, n=1877
## Wald test = 580.9497 on 4 df, p = 0
## Warning in logistf(o ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(o ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year,
## RegionSouthern Africa, RegionSouthern Asia, RegionWestern Africa exceeded. Try
## to increase the number of iterations by passing 'logistpl.control(maxit=...)'
## to parameter plcontrol
## logistf(formula = o ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) -105.1643788 612.1606066 -605.1643788 394.8356212
## Year 0.0484608 0.3033649 -0.1994038 0.2961592
## RegionSouthern Africa 1.2066802 1.9147514 -4.2356803 4.9726518
## RegionSouthern Asia 0.6785635 1.8239416 -4.7948089 5.4484336
## RegionWestern Africa 2.5649350 1.7590046 -2.6717788 7.8519283
## Chisq p method
## (Intercept) 0.004445785 0.9468391 2
## Year 0.003942428 0.9499347 2
## RegionSouthern Africa 0.652709494 0.4191456 2
## RegionSouthern Asia 0.200708690 0.6541494 2
## RegionWestern Africa 1.327986488 0.2491637 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=1.502643 on 4 df, p=0.8261731, n=1877
## Wald test = 99.40515 on 4 df, p = 0
## Warning in logistf(o ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(o ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year,
## RegionSouthern Africa, RegionSouthern Asia exceeded. Try to increase the number
## of iterations by passing 'logistpl.control(maxit=...)' to parameter plcontrol
## logistf(formula = o ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) -130.00000000 81.57949839 -630.0000000 370.0000000
## Year 0.06304831 0.04042727 -0.1851406 0.3105105
## RegionSouthern Africa 0.35632392 0.26744907 -5.3555231 10.7892712
## RegionSouthern Asia 0.35756699 0.21426960 -2.3085569 11.5892249
## RegionWestern Africa 0.47654275 0.40913946 -16.8466748 13.1276275
## Chisq p method
## (Intercept) 0.000000 1.000000e+00 2
## Year 0.000000 1.000000e+00 2
## RegionSouthern Africa 48.124693 3.999578e-12 2
## RegionSouthern Asia Inf 0.000000e+00 2
## RegionWestern Africa 6.760224 9.321205e-03 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=-76.63685 on 4 df, p=1, n=1877
## Wald test = 834.1605 on 4 df, p = 0
## Warning in logistf(o ~ Year + Region, data = d): logistf.fit: Maximum number of
## iterations for full model exceeded. Try to increase the number of iterations or
## alter step size by passing 'logistf.control(maxit=..., maxstep=...)' to
## parameter control
## Warning in logistf(o ~ Year + Region, data = d): Nonconverged PL confidence
## limits: maximum number of iterations for variables: (Intercept), Year,
## RegionSouthern Asia exceeded. Try to increase the number of iterations by
## passing 'logistpl.control(maxit=...)' to parameter plcontrol
## logistf(formula = o ~ Year + Region, data = d)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) -130.00000000 245.9853743 -630.0000000 370.0000000
## Year 0.06189375 0.1218978 -0.1862013 0.3094002
## RegionSouthern Africa -0.04477779 0.9096706 -12.9112451 5.7649779
## RegionSouthern Asia 0.13053122 0.6785308 -4.5506945 5.7846867
## RegionWestern Africa 1.36140326 0.8426392 -2.7214082 7.8420568
## Chisq p method
## (Intercept) 0.000000 1.00000000 2
## Year 0.000000 1.00000000 2
## RegionSouthern Africa 0.000000 1.00000000 2
## RegionSouthern Asia 2.144623 0.14307015 2
## RegionWestern Africa 4.730389 0.02963417 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=-2.691265 on 4 df, p=1, n=1877
## Wald test = 329.4833 on 4 df, p = 0
logistic_sig_o <- as_tibble(oresult, rownames="variable") %>% filter(p<0.05) %>% left_join(obayes$locus_rank)
## Joining with `by = join_by(locus)`
p<-0.05
region_5pc_o <- obayes$region_est %>% select(locus, subgroup, mean) %>%
pivot_wider(id_cols=locus, names_from=subgroup, values_from = mean) %>%
mutate(d1=abs(`Eastern Africa`-`Southern Africa`),
d2=abs(`Eastern Africa`-`Western Africa`),
d3=abs(`Eastern Africa`-`Southern Asia`),
d4=abs(`Southern Africa`-`Southern Asia`),
d5=abs(`Southern Africa`-`Western Africa`),
d6=abs(`Western Africa`-`Southern Asia`)) %>%
filter(d1>p | d2>p | d3>p | d4>p | d5>p | d6>p)
Fig S5 - overlapping regional densities for O types - log2
scale
obayes$region_post %>%
left_join(obayes$locus_rank) %>%
filter(rank<=10) %>%
mutate(Region=fct_relevel(subgroup,c("Western Africa", "Southern Africa", "Eastern Africa", "Southern Asia"))) %>%
ggplot() +
geom_hline(yintercept=11) +
geom_hline(yintercept=21) +
geom_density_ridges(aes(x=log2(prob), y=locus, fill=Region),
scale=1, rel_min_height = 0.01, alpha=0.5, col=NA) +
scale_y_discrete(limits=rev(obayes$locus_rank$locus[1:10])) +
scale_x_continuous(limits=c(-13,0)) +
labs(x="log2(prevalence) per region", y="", fill="Region") +
annotate(y=region_5pc_o$locus, label="*", x=-1, geom="text") +
annotate(y=logistic_sig_o$locus, label="^", x=-0.5, geom="text") +
scale_fill_manual(values=region_cols) +
theme_bw()
## Joining with `by = join_by(locus)`
## Picking joint bandwidth of 0.0644
## Warning: Removed 54 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).

ggsave("figures/FigS5_RegionalOBayesDist.pdf", width=6, height=5)
## Picking joint bandwidth of 0.0644
## Warning: Removed 54 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).
ggsave("figures/FigS5_RegionalOBayesDist.png", width=6, height=5)
## Picking joint bandwidth of 0.0644
## Warning: Removed 54 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).
Appendix Fig S1.3 = crude annual K rate per region
K_crude_perRegion_perYear <- data_NNS_sites10 %>%
raw_adj_prop(grouping_vars = c("K_locus", "Region", "Year"), summarise_by = "Region", adj_vars = c("Cluster", "Region", "Year"))
## grouping_var: Region in adj_vars, removing from adj_vars
## grouping_var: Year in adj_vars, removing from adj_vars
## Grouping vars: K_locus, Region, Year
## Summarising by: Region
## Adj vars: Cluster
K_crude_perRegion_perYear <- K_crude_perRegion_perYear %>% group_by(Region, Year) %>%
summarise(n=sum(adj_count)) %>% left_join(K_crude_perRegion_perYear) %>%
mutate(adj_prop = adj_count/n) %>% select(-raw_sum, -adj_sum, -raw_prop, -raw_count)
## `summarise()` has grouped output by 'Region'. You can override using the
## `.groups` argument.
## Joining with `by = join_by(Region, Year)`
K_crude_perRegion_perYear %>%
left_join(kbayes$locus_rank %>% rename(K_locus=locus)) %>%
filter(rank <=30 | K_locus %in% c("KL116", "KL53", "KL81")) %>%
ggplot(aes(x=Year, fill=Region, y=adj_prop)) +
geom_col() +
theme_bw() +
theme(axis.text.x = element_text(angle=45, hjust=1, size=8),
axis.text.y=element_text(size=10)) +
facet_wrap(~rank+K_locus, scales="free_y") +
theme(legend.position="bottom") +
scale_fill_manual(values=region_cols)
## Joining with `by = join_by(K_locus)`

ggsave("figures/AppendixFigS1.3_crudeRegionAnnualRateK.pdf", width=7, height=9)
ggsave("figures/AppendixFigS1.3_crudeRegionAnnualRateK.png", width=7, height=9)
Appendix Fig S1.4 - simple est per year per O/region
O_crude_perRegion_perYear <- data_NNS_sites10 %>%
raw_adj_prop(grouping_vars = c("O_genotype", "Region", "Year"), summarise_by = "Region", adj_vars = c("Cluster", "Region", "Year"))
## grouping_var: Region in adj_vars, removing from adj_vars
## grouping_var: Year in adj_vars, removing from adj_vars
## Grouping vars: O_genotype, Region, Year
## Summarising by: Region
## Adj vars: Cluster
O_crude_perRegion_perYear <- O_crude_perRegion_perYear %>% group_by(Region, Year) %>%
summarise(n=sum(adj_count)) %>% left_join(O_crude_perRegion_perYear) %>%
mutate(adj_prop = adj_count/n) %>% select(-raw_sum, -adj_sum, -raw_prop, -raw_count)
## `summarise()` has grouped output by 'Region'. You can override using the
## `.groups` argument.
## Joining with `by = join_by(Region, Year)`
O_crude_perRegion_perYear %>%
left_join(obayes$locus_rank %>% rename(O_genotype=locus)) %>%
#filter(rank <=12) %>%
ggplot(aes(x=Year, fill=Region, y=adj_prop)) +
geom_col() +
theme_bw() +
theme(axis.text.x = element_text(angle=45, hjust=1, size=10),
axis.text.y=element_text(size=10)) +
facet_wrap(~rank+O_genotype, scales="free_y", ncol=3) +
theme(legend.position="bottom") +
scale_fill_manual(values=region_cols) + labs(fill="")
## Joining with `by = join_by(O_genotype)`

ggsave("figures/AppendixFigS1.4_crudeRegionAnnualRateO.pdf", width=6, height=8)
ggsave("figures/AppendixFigS1.4_crudeRegionAnnualRateO.png", width=6, height=8)
effect of clustering - modelled estimates
kbayes_global <- full_join(kbayes$global_est, kbayes_raw$global_est, by="locus", suffix=c(".adj", ".raw"))
bayes_global_K_raw_vs_adj <- kbayes_global %>%
ggplot(aes(x=mean.raw*100, y=mean.adj*100)) +
geom_point() + theme_bw() +
labs(x="Bayesian estimate (%), raw", y="Bayesian estimate (%), cluster-adjusted") +
geom_abline(slope=1, intercept=0) +
geom_text(data=kbayes_global[kbayes_global$locus %in% kbayes$locus_rank$locus[1:5],], aes(x=mean.raw*100, y=mean.adj*100, label=locus), hjust=1, nudge_x=-0.2, size=2.5)
obayes_global <- full_join(obayes$global_est, obayes_raw$global_est, by="locus", suffix=c(".adj", ".raw"))
bayes_global_O_raw_vs_adj <- obayes_global %>%
ggplot(aes(x=mean.raw*100, y=mean.adj*100)) +
geom_point() + theme_bw() +
labs(x="Bayesian estimate (%), raw", y="Bayesian estimate (%), cluster-adjusted") +
geom_abline(slope=1, intercept=0) +
geom_text(data=obayes_global[obayes_global$locus %in% obayes$locus_rank$locus[1:9],], aes(x=mean.raw*100, y=mean.adj*100, label=locus), hjust=1, nudge_x=-0.2, size=2.5)
Appendix Figure S2.4 - cluster vs raw, global K
( (bayes_global_K_raw_vs_adj + ggtitle("a) K locus (mean estimate)") ) + (bayes_global_O_raw_vs_adj + ggtitle("b) O type (mean estimate)")) + patchwork::plot_layout(axes="collect") ) / (k_dist_raw_adj + ggtitle ("c) K locus (posterior distribution)") + o_dist_raw_adj + ggtitle ("d) O type (posterior distribution)") + patchwork::plot_layout(guides="collect")) + patchwork::plot_layout(heights=c(1,2)) & theme(legend.position='bottom')
## Picking joint bandwidth of 0.0595
## Warning: Removed 162 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).
## Picking joint bandwidth of 0.104
## Warning: Removed 118 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).

ggsave("figures/AppendixFigS2.4_clusterVsRaw.pdf", width=8, height=11)
## Picking joint bandwidth of 0.0595
## Warning: Removed 162 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).
## Picking joint bandwidth of 0.104
## Warning: Removed 118 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).
ggsave("figures/AppendixFigS2.4_clusterVsRaw.png", width=8, height=11)
## Picking joint bandwidth of 0.0595
## Warning: Removed 162 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).
## Picking joint bandwidth of 0.104
## Warning: Removed 118 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).
Effect of cluster adjustment on crude proportion per site
K_crude_perSite <- data_NNS_sites10 %>%
raw_adj_prop(grouping_vars = c("K_locus", "Site"), summarise_by = "Site", adj_vars = c("Cluster", "Site")) %>%
mutate(raw_prop = raw_count/raw_sum) %>%
mutate(adj_prop = adj_count/adj_sum)
## grouping_var: Site in adj_vars, removing from adj_vars
## Grouping vars: K_locus, Site
## Summarising by: Site
## Adj vars: Cluster
O_crude_perSite <- data_NNS_sites10 %>%
raw_adj_prop(grouping_vars = c("O_genotype", "Site"), summarise_by = "Site", adj_vars = c("Cluster", "Site")) %>%
mutate(raw_prop = raw_count/raw_sum) %>%
mutate(adj_prop = adj_count/adj_sum)
## grouping_var: Site in adj_vars, removing from adj_vars
## Grouping vars: O_genotype, Site
## Summarising by: Site
## Adj vars: Cluster
# KL25
K_crude_raw_adj_scatter_KL25 <- K_crude_perSite %>%
filter(K_locus=="KL25") %>%
ggplot(aes(x=raw_prop, y=adj_prop)) +
geom_point() +
geom_abline(slope=1, intercept=0) +
theme_bw() +
labs(x="Crude proportion, raw", y="Crude proportion, adjusted")
# KL2
K_crude_raw_adj_scatter_KL2 <- K_crude_perSite %>%
filter(K_locus=="KL2") %>%
ggplot(aes(x=raw_prop, y=adj_prop)) +
geom_point() +
geom_abline(slope=1, intercept=0) +
theme_bw() +
labs(x="Crude proportion, raw", y="Crude proportion, adjusted")
# KL102
K_crude_raw_adj_scatter_KL102 <- K_crude_perSite %>%
filter(K_locus=="KL102") %>%
ggplot(aes(x=raw_prop, y=adj_prop)) +
geom_point() +
geom_abline(slope=1, intercept=0) +
theme_bw() +
labs(x="Crude proportion, raw", y="Crude proportion, adjusted")
# KL62
K_crude_raw_adj_scatter_KL62 <- K_crude_perSite %>%
filter(K_locus=="KL62") %>%
ggplot(aes(x=raw_prop, y=adj_prop)) +
geom_point() +
geom_abline(slope=1, intercept=0) +
theme_bw() +
labs(x="Crude proportion, raw", y="Crude proportion, adjusted")
# O1v1
O_crude_raw_adj_scatter_O1v1 <- O_crude_perSite %>%
filter(O_genotype=="O1.v1") %>%
ggplot(aes(x=raw_prop, y=adj_prop)) +
geom_point() +
geom_abline(slope=1, intercept=0) +
theme_bw() +
labs(x="Crude proportion, raw", y="Crude proportion, adjusted")
# O1v2
O_crude_raw_adj_scatter_O1v2 <- O_crude_perSite %>%
filter(O_genotype=="O1.v2") %>%
ggplot(aes(x=raw_prop, y=adj_prop)) +
geom_point() +
geom_abline(slope=1, intercept=0) +
theme_bw() +
labs(x="Crude proportion, raw", y="Crude proportion, adjusted")
# O2afg
O_crude_raw_adj_scatter_O2afg <- O_crude_perSite %>%
filter(O_genotype=="O2afg") %>%
ggplot(aes(x=raw_prop, y=adj_prop)) +
geom_point() +
geom_abline(slope=1, intercept=0) +
theme_bw() +
labs(x="Crude proportion, raw", y="Crude proportion, adjusted")
# O4
O_crude_raw_adj_scatter_O4 <- O_crude_perSite %>%
filter(O_genotype=="O4") %>%
ggplot(aes(x=raw_prop, y=adj_prop)) +
geom_point() +
geom_abline(slope=1, intercept=0) +
theme_bw() +
labs(x="Crude proportion, raw", y="Crude proportion, adjusted")
Appendix Fig S2.3 - Effect of cluster adjustment on crude
proportion
(K_crude_raw_adj_scatter_KL102 + ggtitle("a) KL102, per-site") + K_crude_raw_adj_scatter_KL2 + ggtitle("b) KL2, per-site")) / (K_crude_raw_adj_scatter_KL25 + ggtitle("c) KL25, per-site") + K_crude_raw_adj_scatter_KL62 + ggtitle("d) KL62, per-site")) / (O_crude_raw_adj_scatter_O1v1 + ggtitle("e) O1v1, per-site") + O_crude_raw_adj_scatter_O1v2 + ggtitle("f) O1v2, per-site")) / (O_crude_raw_adj_scatter_O2afg + ggtitle("g) O2afg, per-site") + O_crude_raw_adj_scatter_O4 + ggtitle("h) O4, per-site"))

ggsave("figures/AppendixFigS2.3_EffectClusteringCrude.pdf", width=8, height=10)
ggsave("figures/AppendixFigS2.3_EffectClusteringCrude.png", width=8, height=10)
numbers for text - K prevalence estimates
kbayes$global_est
## # A tibble: 90 × 6
## locus median mean lower upper rank
## <chr> <dbl> <dbl> <dbl> <dbl> <int>
## 1 KL2 0.0894 0.0897 0.0711 0.110 1
## 2 KL102 0.0869 0.0872 0.0683 0.107 2
## 3 KL25 0.0855 0.0858 0.0678 0.106 3
## 4 KL15 0.0539 0.0543 0.0396 0.0708 4
## 5 KL62 0.0464 0.0468 0.0332 0.0624 5
## 6 KL30 0.0325 0.0328 0.0218 0.0459 6
## 7 KL10 0.0261 0.0265 0.0166 0.0388 7
## 8 KL17 0.0248 0.0252 0.0155 0.0370 8
## 9 KL23 0.0248 0.0252 0.0157 0.0369 9
## 10 KL51 0.0237 0.0240 0.0147 0.0355 10
## # ℹ 80 more rows
kbayes$global_est %>% filter(rank<=5)
## # A tibble: 5 × 6
## locus median mean lower upper rank
## <chr> <dbl> <dbl> <dbl> <dbl> <int>
## 1 KL2 0.0894 0.0897 0.0711 0.110 1
## 2 KL102 0.0869 0.0872 0.0683 0.107 2
## 3 KL25 0.0855 0.0858 0.0678 0.106 3
## 4 KL15 0.0539 0.0543 0.0396 0.0708 4
## 5 KL62 0.0464 0.0468 0.0332 0.0624 5
kbayes$global_est %>% filter(median>0.02)
## # A tibble: 13 × 6
## locus median mean lower upper rank
## <chr> <dbl> <dbl> <dbl> <dbl> <int>
## 1 KL2 0.0894 0.0897 0.0711 0.110 1
## 2 KL102 0.0869 0.0872 0.0683 0.107 2
## 3 KL25 0.0855 0.0858 0.0678 0.106 3
## 4 KL15 0.0539 0.0543 0.0396 0.0708 4
## 5 KL62 0.0464 0.0468 0.0332 0.0624 5
## 6 KL30 0.0325 0.0328 0.0218 0.0459 6
## 7 KL10 0.0261 0.0265 0.0166 0.0388 7
## 8 KL17 0.0248 0.0252 0.0155 0.0370 8
## 9 KL23 0.0248 0.0252 0.0157 0.0369 9
## 10 KL51 0.0237 0.0240 0.0147 0.0355 10
## 11 KL112 0.0236 0.0240 0.0147 0.0356 11
## 12 KL24 0.0223 0.0227 0.0135 0.0341 12
## 13 KL149 0.0212 0.0215 0.0128 0.0326 13
kbayes$global_est %>% filter(median>0.01)
## # A tibble: 27 × 6
## locus median mean lower upper rank
## <chr> <dbl> <dbl> <dbl> <dbl> <int>
## 1 KL2 0.0894 0.0897 0.0711 0.110 1
## 2 KL102 0.0869 0.0872 0.0683 0.107 2
## 3 KL25 0.0855 0.0858 0.0678 0.106 3
## 4 KL15 0.0539 0.0543 0.0396 0.0708 4
## 5 KL62 0.0464 0.0468 0.0332 0.0624 5
## 6 KL30 0.0325 0.0328 0.0218 0.0459 6
## 7 KL10 0.0261 0.0265 0.0166 0.0388 7
## 8 KL17 0.0248 0.0252 0.0155 0.0370 8
## 9 KL23 0.0248 0.0252 0.0157 0.0369 9
## 10 KL51 0.0237 0.0240 0.0147 0.0355 10
## # ℹ 17 more rows
kbayes$global_est %>% filter(locus=="KL149")
## # A tibble: 1 × 6
## locus median mean lower upper rank
## <chr> <dbl> <dbl> <dbl> <dbl> <int>
## 1 KL149 0.0212 0.0215 0.0128 0.0326 13
kbayes$region_est %>% filter(locus=="KL149")
## # A tibble: 4 × 6
## # Groups: locus [1]
## locus subgroup median mean lower upper
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 KL149 Eastern Africa 0.00433 0.00510 0.000878 0.0134
## 2 KL149 Southern Africa 0.0851 0.0867 0.0475 0.136
## 3 KL149 Southern Asia 0.00609 0.00720 0.00112 0.0197
## 4 KL149 Western Africa 0.00573 0.00876 0.000475 0.0344
kbayes$global_est %>% filter(locus=="KL15")
## # A tibble: 1 × 6
## locus median mean lower upper rank
## <chr> <dbl> <dbl> <dbl> <dbl> <int>
## 1 KL15 0.0539 0.0543 0.0396 0.0708 4
kbayes$region_est %>% filter(locus=="KL15")
## # A tibble: 4 × 6
## # Groups: locus [1]
## locus subgroup median mean lower upper
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 KL15 Eastern Africa 0.0495 0.0504 0.0303 0.0753
## 2 KL15 Southern Africa 0.00923 0.0109 0.00180 0.0294
## 3 KL15 Southern Asia 0.0916 0.0926 0.0593 0.132
## 4 KL15 Western Africa 0.0321 0.0369 0.00713 0.0933
kbayes$global_est %>% filter(locus=="KL51")
## # A tibble: 1 × 6
## locus median mean lower upper rank
## <chr> <dbl> <dbl> <dbl> <dbl> <int>
## 1 KL51 0.0237 0.0240 0.0147 0.0355 10
kbayes$region_est %>% filter(locus=="KL51")
## # A tibble: 4 × 6
## # Groups: locus [1]
## locus subgroup median mean lower upper
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 KL51 Eastern Africa 0.00380 0.00455 0.000650 0.0127
## 2 KL51 Southern Africa 0.00225 0.00348 0.000176 0.0134
## 3 KL51 Southern Asia 0.0697 0.0712 0.0425 0.108
## 4 KL51 Western Africa 0.00431 0.00694 0.000316 0.0295
kbayes$global_est %>% filter(locus=="KL81")
## # A tibble: 1 × 6
## locus median mean lower upper rank
## <chr> <dbl> <dbl> <dbl> <dbl> <int>
## 1 KL81 0.0123 0.0126 0.00612 0.0214 25
kbayes$region_est %>% filter(locus=="KL81")
## # A tibble: 4 × 6
## # Groups: locus [1]
## locus subgroup median mean lower upper
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 KL81 Eastern Africa 0.00162 0.00224 0.000153 0.00804
## 2 KL81 Southern Africa 0.00149 0.00251 0.0000947 0.0109
## 3 KL81 Southern Asia 0.0357 0.0369 0.0169 0.0637
## 4 KL81 Western Africa 0.00268 0.00494 0.000146 0.0231
kbayes$global_est %>% filter(locus=="KL28")
## # A tibble: 1 × 6
## locus median mean lower upper rank
## <chr> <dbl> <dbl> <dbl> <dbl> <int>
## 1 KL28 0.0136 0.0140 0.00706 0.0229 22
kbayes$region_est %>% filter(locus=="KL28")
## # A tibble: 4 × 6
## # Groups: locus [1]
## locus subgroup median mean lower upper
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 KL28 Eastern Africa 0.0134 0.0142 0.00508 0.0279
## 2 KL28 Southern Africa 0.00607 0.00777 0.000918 0.0237
## 3 KL28 Southern Asia 0.00369 0.00475 0.000506 0.0149
## 4 KL28 Western Africa 0.0632 0.0679 0.0208 0.141
kbayes$global_est %>% filter(locus=="KL116")
## # A tibble: 1 × 6
## locus median mean lower upper rank
## <chr> <dbl> <dbl> <dbl> <dbl> <int>
## 1 KL116 0.00599 0.00634 0.00218 0.0124 35
kbayes$region_est %>% filter(locus=="KL116")
## # A tibble: 4 × 6
## # Groups: locus [1]
## locus subgroup median mean lower upper
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 KL116 Eastern Africa 0.00167 0.00230 0.000172 0.00808
## 2 KL116 Southern Africa 0.00159 0.00253 0.000112 0.0104
## 3 KL116 Southern Asia 0.00218 0.00308 0.000202 0.0110
## 4 KL116 Western Africa 0.0519 0.0565 0.0149 0.124
kbayes$global_est %>% filter(locus=="KL53")
## # A tibble: 1 × 6
## locus median mean lower upper rank
## <chr> <dbl> <dbl> <dbl> <dbl> <int>
## 1 KL53 0.00466 0.00506 0.00140 0.0110 41
kbayes$region_est %>% filter(locus=="KL53")
## # A tibble: 4 × 6
## # Groups: locus [1]
## locus subgroup median mean lower upper
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 KL53 Eastern Africa 0.00150 0.00209 0.000140 0.00737
## 2 KL53 Southern Africa 0.00138 0.00231 0.0000939 0.0102
## 3 KL53 Southern Asia 0.00196 0.00285 0.000171 0.0104
## 4 KL53 Western Africa 0.0361 0.0410 0.00794 0.101
numbers for text - O antigen
obayes$global_est
## # A tibble: 15 × 6
## locus median mean lower upper rank
## <chr> <dbl> <dbl> <dbl> <dbl> <int>
## 1 O1.v1 0.263 0.264 0.234 0.294 1
## 2 O1.v2 0.219 0.219 0.191 0.249 2
## 3 O2afg 0.159 0.159 0.134 0.185 3
## 4 O4 0.0853 0.0856 0.0673 0.106 4
## 5 O2a 0.0641 0.0645 0.0489 0.0824 5
## 6 O3b 0.0576 0.0581 0.0429 0.0760 6
## 7 O5 0.0563 0.0568 0.0419 0.0737 7
## 8 OL13 0.0501 0.0506 0.0365 0.0669 8
## 9 O3/O3a 0.0299 0.0303 0.0197 0.0430 9
## 10 OL104 0.00340 0.00380 0.000794 0.00918 10
## 11 OL103 0.00212 0.00253 0.000327 0.00694 11
## 12 O12 0.00211 0.00250 0.000323 0.00690 12
## 13 OL102 0.000873 0.00127 0.0000343 0.00464 13
## 14 O2a.v3 0.000868 0.00127 0.0000351 0.00477 14
## 15 O1.v3 0.000873 0.00126 0.0000291 0.00468 15
obayes$global_est %>% filter(rank<=5) %>% pull(median) %>% sum()
## [1] 0.7905593
obayes$global_est %>% filter(rank<=5) %>% pull(lower) %>% sum()
## [1] 0.6757192
obayes$global_est %>% filter(rank<=5) %>% pull(upper) %>% sum()
## [1] 0.9163594
obayes$global_est
## # A tibble: 15 × 6
## locus median mean lower upper rank
## <chr> <dbl> <dbl> <dbl> <dbl> <int>
## 1 O1.v1 0.263 0.264 0.234 0.294 1
## 2 O1.v2 0.219 0.219 0.191 0.249 2
## 3 O2afg 0.159 0.159 0.134 0.185 3
## 4 O4 0.0853 0.0856 0.0673 0.106 4
## 5 O2a 0.0641 0.0645 0.0489 0.0824 5
## 6 O3b 0.0576 0.0581 0.0429 0.0760 6
## 7 O5 0.0563 0.0568 0.0419 0.0737 7
## 8 OL13 0.0501 0.0506 0.0365 0.0669 8
## 9 O3/O3a 0.0299 0.0303 0.0197 0.0430 9
## 10 OL104 0.00340 0.00380 0.000794 0.00918 10
## 11 OL103 0.00212 0.00253 0.000327 0.00694 11
## 12 O12 0.00211 0.00250 0.000323 0.00690 12
## 13 OL102 0.000873 0.00127 0.0000343 0.00464 13
## 14 O2a.v3 0.000868 0.00127 0.0000351 0.00477 14
## 15 O1.v3 0.000873 0.00126 0.0000291 0.00468 15
numbers for text - O differences by region
# where is O4 found
data_NNS_sites10 %>% filter(O_genotype=="O4") %>% group_by(Region, Study, Site, ST, K_locus) %>% count() %>% arrange(-n)
## # A tibble: 50 × 6
## # Groups: Region, Study, Site, ST, K_locus [50]
## Region Study Site ST K_locus n
## <chr> <chr> <dbl> <chr> <chr> <int>
## 1 Southern Asia CHRF 40 ST11 KL15 59
## 2 Eastern Africa BARNARDS 20 ST37 KL15 28
## 3 Southern Africa BabyGERMS 30 ST152 KL149 16
## 4 Southern Asia CHRF 40 ST502 KL15 15
## 5 Southern Asia NeoOBS-India 5 ST11 KL15 14
## 6 Southern Africa BabyGERMS 31 ST405 KL151 9
## 7 Eastern Africa MLW 36 ST45 KL15 8
## 8 Southern Asia CHRF 41 ST11 KL15 7
## 9 Southern Asia CHRF 40 ST437 KL36 6
## 10 Southern Asia DH 2 ST11 KL15 6
## # ℹ 40 more rows
data_NNS_sites10 %>% filter(O_genotype=="O4" & Region=="Southern Asia") %>% group_by(ST, K_locus) %>% count() %>% arrange(-n)
## # A tibble: 16 × 3
## # Groups: ST, K_locus [16]
## ST K_locus n
## <chr> <chr> <int>
## 1 ST11 KL15 87
## 2 ST502 KL15 15
## 3 ST437 KL36 9
## 4 ST340 KL15 5
## 5 ST152 KL149 3
## 6 ST273 KL15 3
## 7 ST2258 KL131 2
## 8 ST37 KL15 2
## 9 ST231 KL144 1
## 10 ST273 KL141 1
## 11 ST3623 KL15 1
## 12 ST392 KL27 1
## 13 ST429 KL27 1
## 14 ST70 KL15 1
## 15 ST726 KL15 1
## 16 ST883-1LV KL15 1
data_NNS_sites10 %>% filter(O_genotype=="O4" & Region=="Southern Asia") %>% group_by(ST, K_locus, Site) %>% count() %>% arrange(-n)
## # A tibble: 26 × 4
## # Groups: ST, K_locus, Site [26]
## ST K_locus Site n
## <chr> <chr> <dbl> <int>
## 1 ST11 KL15 40 59
## 2 ST502 KL15 40 15
## 3 ST11 KL15 5 14
## 4 ST11 KL15 41 7
## 5 ST11 KL15 2 6
## 6 ST437 KL36 40 6
## 7 ST152 KL149 40 3
## 8 ST273 KL15 40 3
## 9 ST340 KL15 38 2
## 10 ST340 KL15 40 2
## # ℹ 16 more rows
data_NNS_sites10 %>% filter(O_genotype=="O2a.v1") %>% group_by(Region, Study, Site, ST, K_locus) %>% count() %>% arrange(-n)
## # A tibble: 0 × 6
## # Groups: Region, Study, Site, ST, K_locus [0]
## # ℹ 6 variables: Region <chr>, Study <chr>, Site <dbl>, ST <chr>,
## # K_locus <chr>, n <int>
data_NNS_sites10 %>% filter(O_genotype=="O2a.v1" & Region=="Southern Asia") %>% group_by(ST, K_locus) %>% count() %>% arrange(-n)
## # A tibble: 0 × 3
## # Groups: ST, K_locus [0]
## # ℹ 3 variables: ST <chr>, K_locus <chr>, n <int>
data_NNS_sites10 %>% filter(O_genotype=="O2a.v1" & Region=="Southern Asia") %>% group_by(ST, K_locus, Site) %>% count() %>% arrange(-n)
## # A tibble: 0 × 4
## # Groups: ST, K_locus, Site [0]
## # ℹ 4 variables: ST <chr>, K_locus <chr>, Site <dbl>, n <int>
data_NNS_sites10 %>% filter(O_genotype=="O5") %>% group_by(Region, Study, Site, ST, K_locus) %>% count() %>% arrange(-n)
## # A tibble: 21 × 6
## # Groups: Region, Study, Site, ST, K_locus [21]
## Region Study Site ST K_locus n
## <chr> <chr> <dbl> <chr> <chr> <int>
## 1 Eastern Africa Kilifi 50 ST17 KL25 15
## 2 Southern Africa GBS 45 ST17 KL25 9
## 3 Southern Africa Botswana 59 ST17 KL25 8
## 4 Southern Africa BabyGERMS 33 ST17 KL25 7
## 5 Southern Africa BabyGERMS 32 ST17 KL25 4
## 6 Eastern Africa Kilifi 50 ST336 KL25 3
## 7 Eastern Africa MLW 36 ST17 KL25 3
## 8 Southern Africa BabyGERMS 31 ST17 KL25 3
## 9 Southern Asia AKU 11 ST17 KL25 3
## 10 Southern Africa BabyGERMS 30 ST17 KL25 2
## # ℹ 11 more rows
data_NNS_sites10 %>% filter(O_genotype=="O5" & Region=="Southern Africa") %>% group_by(ST, K_locus) %>% count() %>% arrange(-n)
## # A tibble: 3 × 3
## # Groups: ST, K_locus [3]
## ST K_locus n
## <chr> <chr> <int>
## 1 ST17 KL25 37
## 2 ST2621 KL25 1
## 3 ST3184 KL25 1
data_NNS_sites10 %>% filter(O_genotype=="O5" & Region=="Southern Africa") %>% group_by(ST, K_locus, Site) %>% count() %>% arrange(-n)
## # A tibble: 10 × 4
## # Groups: ST, K_locus, Site [10]
## ST K_locus Site n
## <chr> <chr> <dbl> <int>
## 1 ST17 KL25 45 9
## 2 ST17 KL25 59 8
## 3 ST17 KL25 33 7
## 4 ST17 KL25 32 4
## 5 ST17 KL25 31 3
## 6 ST17 KL25 30 2
## 7 ST17 KL25 34 2
## 8 ST17 KL25 35 2
## 9 ST2621 KL25 33 1
## 10 ST3184 KL25 35 1
Global and regional coverage - top20 global - Fig3bi
kbayes_raw_coverage <- getCumulativeCoverageGlobalRegional(kbayes_raw, kbayes$locus_rank) %>%
mutate(subgroup=fct_relevel(subgroup,c("Global", "Southern Asia", "Eastern Africa", "Southern Africa", "Western Africa")))
coverage_globalTop20_globalRegional <- plotCoverage(kbayes_raw_coverage, kbayes$locus_rank, maxRank=20, cols=region_cols)

coverage_globalTop20_globalRegional

fatal - read posterior estimates for adjusted counts (main) and raw
counts (for comparison)
# read raw Bayesian estimates for K - using fatal samples from sites with at least 10 fatal
kbayes_fatal_raw_min10perSite <- parseModelledEstimates(global_post="outputs_core/K_Fatal_min10_raw_28_posterior_global.csv.gz", region_post="outputs_core/K_Fatal_min10_raw_28_posterior_subgroup.csv.gz")
## Rows: 576000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 2304000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
kbayes_fatal_raw_min10perSite_coverage <- getCumulativeCoverage(kbayes$locus_rank, kbayes_fatal_raw_min10perSite$global_post) %>% mutate(type="min10")
ESBL - read posterior estimates for adjusted counts (main) and raw
counts (for comparison)
kbayes_esbl_raw_min10perSite <- parseModelledEstimates(global_post="outputs_core/K_ESBL_min10_raw_28_posterior_global.csv.gz", region_post="outputs_core/K_ESBL_min10_raw_28_posterior_subgroup.csv.gz")
## Rows: 960000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3840000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
kbayes_esbl_raw_min10perSite_coverage <- getCumulativeCoverage(kbayes$locus_rank, kbayes_esbl_raw_min10perSite$global_post) %>% mutate(type="min10")
CP - read posterior estimates for adjusted counts (main) and raw
counts (for comparison)
kbayes_cp_raw_min10perSite <- parseModelledEstimates(global_post="outputs_core/K_Carba_min10_raw_28_posterior_global.csv.gz", region_post="outputs_core/K_Carba_min10_raw_28_posterior_subgroup.csv.gz")
## Rows: 588000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1764000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
kbayes_cp_raw_min10perSite_coverage <- getCumulativeCoverage(kbayes$locus_rank, kbayes_cp_raw_min10perSite$global_post) %>% mutate(type="min10")
combine coverage estimates for subgroups (fatal, ESBL, CP) -
Fig3ai
kbayes_raw_coverage_subgroups <- kbayes_esbl_raw_min10perSite_coverage %>% mutate(subgroup="ESBL") %>%
bind_rows(kbayes_cp_raw_min10perSite_coverage %>% mutate(subgroup="CP")) %>%
bind_rows(kbayes_fatal_raw_min10perSite_coverage %>% mutate(subgroup="Fatal")) %>%
bind_rows(kbayes_raw_coverage %>% filter(subgroup=="Global") %>% mutate(subgroup="All") ) %>%
mutate(subgroup=fct_relevel(subgroup,c("All", "Fatal", "ESBL", "CP")))
coverage_globalTop20_subgroups <- plotCoverage(kbayes_raw_coverage_subgroups, kbayes$locus_rank, maxRank=20, cols=group_cols)

coverage_globalTop20_subgroups

take top 8 per region - Fig3aii and Fig3bii
## extract cluster-adjusted coverage per region
K_rank_SA <- kbayes$region_est %>% ungroup %>% filter(subgroup=="Southern Africa") %>% arrange(-mean) %>% mutate(rank=row_number())
K_rank_WA <- kbayes$region_est %>% ungroup %>% filter(subgroup=="Western Africa") %>% arrange(-mean) %>% mutate(rank=row_number())
K_rank_EA <- kbayes$region_est %>% ungroup %>% filter(subgroup=="Eastern Africa") %>% arrange(-mean) %>% mutate(rank=row_number())
K_rank_SAs <- kbayes$region_est %>% ungroup %>% filter(subgroup=="Southern Asia") %>% arrange(-mean) %>% mutate(rank=row_number())
# get top-8 within each region - gives 21 loci
n<-8
topN <- bind_rows(K_rank_SA, K_rank_EA) %>% bind_rows(K_rank_WA) %>% bind_rows(K_rank_SAs) %>% filter(rank<=n) %>% pull(locus) %>% unique()
length(unique(topN))
## [1] 21
kbayes$locus_rank[kbayes$locus_rank$locus %in% topN,]
## # A tibble: 21 × 2
## locus rank
## <chr> <int>
## 1 KL2 1
## 2 KL102 2
## 3 KL25 3
## 4 KL15 4
## 5 KL62 5
## 6 KL30 6
## 7 KL10 7
## 8 KL17 8
## 9 KL23 9
## 10 KL51 10
## # ℹ 11 more rows
# remove KL8, as this only benefits Southern Africa which already exceeds 80% without KL9, and brings us back to 20 loci
k_rank_region8 <- kbayes$locus_rank[kbayes$locus_rank$locus %in% topN,] %>%
filter(locus!="KL9") %>%
mutate(rank=1:(length(unique(topN))-1))
# calculate global & regional coverage for these K - Fig 3bii
kbayes_raw_coverage_top8perRegion <- getCumulativeCoverageGlobalRegional(kbayes_raw, k_rank_region8) %>%
mutate(subgroup=fct_relevel(subgroup,c("Global", "Southern Asia", "Eastern Africa", "Southern Africa", "Western Africa")))
coverage_global_top8perRegion <- plotCoverage(kbayes_raw_coverage_top8perRegion, k_rank_region8, maxRank=nrow(k_rank_region8), cols=region_cols)

coverage_global_top8perRegion

# calculate subgroup coverage for these K - Fig 3aii
kbayes_fatal_raw_min10perSite_coverage_top8perRegion <- getCumulativeCoverage(k_rank_region8, kbayes_fatal_raw_min10perSite$global_post)
kbayes_esbl_raw_min10perSite_coverage_top8perRegion <- getCumulativeCoverage(k_rank_region8, kbayes_esbl_raw_min10perSite$global_post)
kbayes_cp_raw_min10perSite_coverage_top8perRegion <- getCumulativeCoverage(k_rank_region8, kbayes_cp_raw_min10perSite$global_post)
kbayes_raw_coverage_subgroups_top8perRegion <- kbayes_esbl_raw_min10perSite_coverage_top8perRegion %>% mutate(subgroup="ESBL") %>%
bind_rows(kbayes_cp_raw_min10perSite_coverage_top8perRegion %>% mutate(subgroup="CP")) %>%
bind_rows(kbayes_fatal_raw_min10perSite_coverage_top8perRegion %>% mutate(subgroup="Fatal")) %>%
bind_rows(kbayes_raw_coverage_top8perRegion %>% filter(subgroup=="Global") %>% mutate(subgroup="All") ) %>%
mutate(subgroup=fct_relevel(subgroup,c("All", "Fatal", "ESBL", "CP")))
coverage_subgroups_top8perRegion <- plotCoverage(kbayes_raw_coverage_subgroups_top8perRegion, k_rank_region8, maxRank=nrow(k_rank_region8), cols=group_cols)

coverage_subgroups_top8perRegion

(coverage_globalTop20_subgroups + ggtitle("a) Global coverage, by case type", subtitle="KL set 1: global top-20") + coverage_subgroups_top8perRegion + ggtitle("", subtitle="KL set 2: top-8 per region") + patchwork::plot_layout(guides="collect", axes="collect")) / (coverage_globalTop20_globalRegional + ggtitle("b) Per-region coverage", subtitle="KL set 1: global top-20") + coverage_global_top8perRegion + ggtitle("", subtitle="KL set 2: top-8 per region") + patchwork::plot_layout(guides="collect", axes="collect"))

ggsave("figures/Fig3_Kcoverage_raw.pdf", width=8, height=8)
ggsave("figures/Fig3_Kcoverage_raw.png", width=8, height=8)
Table 2 - coverage stats for all/ESBL/CP/fatal, for global and
regional
kbayes_fatal_raw_min10perSite_coverageRegional <- getCumulativeCoverageGlobalRegional(ranks=kbayes$locus_rank, bayes=kbayes_fatal_raw_min10perSite, maxRank=20)
kbayes_esbl_raw_min10perSite_coverageRegional <- getCumulativeCoverageGlobalRegional(ranks=kbayes$locus_rank, bayes=kbayes_esbl_raw_min10perSite, maxRank=20)
kbayes_cp_raw_min10perSite_coverageRegional <- getCumulativeCoverageGlobalRegional(ranks=kbayes$locus_rank, bayes=kbayes_cp_raw_min10perSite, maxRank=20)
table2 <- kbayes_raw_coverage %>% mutate(cases="All") %>%
bind_rows(kbayes_fatal_raw_min10perSite_coverageRegional %>% mutate(cases=" Fatal infections")) %>%
bind_rows(kbayes_esbl_raw_min10perSite_coverageRegional %>% mutate(cases=" ESBL infections")) %>%
bind_rows(kbayes_cp_raw_min10perSite_coverageRegional %>% mutate(cases=" CP infections")) %>%
filter(rank %in% c(5,10,15,20)) %>%
mutate(upper=if_else(upper>1, 1, upper)) %>%
mutate(summary=paste0(round(mean,3)*100," (",round(lower,3)*100,"-",round(upper,3)*100,")")) %>%
select(rank, subgroup, cases, summary) %>%
pivot_wider(names_from=rank, values_from=summary, id_cols=c(subgroup, cases)) %>%
rename(Region=subgroup)
add totals per subgroup
fatal_counts <- data_NNS_sites10 %>% filter(Mortality==1) %>% group_by(Region) %>% count()
fatal_counts <- fatal_counts %>% bind_rows(list(Region="Global", n=sum(fatal_counts$n)))
esbl_counts <- data_NNS_sites10 %>% filter(resistance_score>=1) %>% group_by(Region) %>% count()
esbl_counts <- esbl_counts %>% bind_rows(list(Region="Global", n=sum(esbl_counts$n)))
cp_counts <- data_NNS_sites10 %>% filter(resistance_score>=2) %>% group_by(Region) %>% count()
cp_counts <- cp_counts %>% bind_rows(list(Region="Global", n=sum(cp_counts$n)))
total_counts <- data_NNS_sites10 %>% group_by(Region) %>% count()
total_counts <- total_counts %>% bind_rows(list(Region="Global", n=sum(cp_counts$n)))
table2 <- esbl_counts %>% mutate(cases=" ESBL infections") %>%
bind_rows(cp_counts%>% mutate(cases=" CP infections")) %>%
bind_rows(fatal_counts %>% mutate(cases=" Fatal infections")) %>%
bind_rows(total_counts %>% mutate(cases="All")) %>%
full_join(table2, by=c("Region", "cases")) %>%
mutate(cases=fct_relevel(cases,c("All", " Fatal infections", " ESBL infections", " CP infections")))
# sort function isn't working when ordering region factor using fct_relevel
table2$Region <- factor(table2$Region, levels = c("Global", "Eastern Africa", "Southern Africa", "Western Africa", "Southern Asia"))
table2 <- table2 %>%
arrange(Region, cases) %>%
relocate(n, .after=cases) %>%
mutate(cases=if_else(cases=="All", Region, cases)) %>%
rename_with(~ paste0(.x, " KL"), c("5", "10", "15", "20")) %>%
rename(N=n) %>%
rename(Data=cases) %>%
ungroup %>% select(-Region)
write_tsv(table2, file="tables/Table2_coverage.tsv")
Table S4 - K set2 coverage stats for all/ESBL/CP/fatal, for global
and regional
kbayes_fatal_raw_min10perSite_coverageRegional_top8perRegion <- getCumulativeCoverageGlobalRegional(ranks=k_rank_region8, bayes=kbayes_fatal_raw_min10perSite, maxRank=20)
kbayes_esbl_raw_min10perSite_coverageRegional_top8perRegion <- getCumulativeCoverageGlobalRegional(ranks=k_rank_region8, bayes=kbayes_esbl_raw_min10perSite, maxRank=20)
kbayes_cp_raw_min10perSite_coverageRegional_top8perRegion <- getCumulativeCoverageGlobalRegional(ranks=k_rank_region8, bayes=kbayes_cp_raw_min10perSite, maxRank=20)
tableS4 <- kbayes_raw_coverage_top8perRegion %>% mutate(cases="All") %>%
bind_rows(kbayes_fatal_raw_min10perSite_coverageRegional_top8perRegion %>% mutate(cases=" Fatal infections")) %>%
bind_rows(kbayes_esbl_raw_min10perSite_coverageRegional_top8perRegion %>% mutate(cases=" ESBL infections")) %>%
bind_rows(kbayes_cp_raw_min10perSite_coverageRegional_top8perRegion %>% mutate(cases=" CP infections")) %>%
mutate(upper=if_else(upper>1, 1, upper)) %>%
filter(rank %in% c(5,10,15,20)) %>%
mutate(summary=paste0(round(mean,3)*100," (",round(lower,3)*100,"-",round(upper,3)*100,")")) %>%
select(rank, subgroup, cases, summary) %>%
pivot_wider(names_from=rank, values_from=summary, id_cols=c(subgroup, cases)) %>%
rename(Region=subgroup)
tableS4 <- esbl_counts %>% mutate(cases=" ESBL infections") %>%
bind_rows(cp_counts%>% mutate(cases=" CP infections")) %>%
bind_rows(fatal_counts %>% mutate(cases=" Fatal infections")) %>%
bind_rows(total_counts %>% mutate(cases="All")) %>%
full_join(tableS4, by=c("Region", "cases")) %>%
mutate(cases=fct_relevel(cases,c("All", " Fatal infections", " ESBL infections", " CP infections")))
# sort function isn't working when ordering region factor using fct_relevel
tableS4$Region <- factor(tableS4$Region, levels = c("Global", "Eastern Africa", "Southern Africa", "Western Africa", "Southern Asia"))
tableS4 <- tableS4 %>%
arrange(Region, cases) %>%
relocate(n, .after=cases) %>%
mutate(cases=if_else(cases=="All", Region, cases)) %>%
rename_with(~ paste0(.x, " KL"), c("5", "10", "15", "20")) %>%
rename(N=n) %>%
rename(Data=cases) %>%
ungroup %>% select(-Region)
write_tsv(tableS4, file="tables/TableS4_coverageSet2.tsv")
Fig S4 - coverage for region-focused K sets
# ranked by Southern Africa
kbayes_raw_coverage_top20SA <- getCumulativeCoverageGlobalRegional(kbayes_raw, K_rank_SA %>% select(locus, rank), maxRank=20) %>%
mutate(subgroup=fct_relevel(subgroup,c("Global", "Southern Asia", "Eastern Africa", "Southern Africa", "Western Africa")))
kbayes_raw_coverage_globalRegional_top20SA <- plotCoverage(kbayes_raw_coverage_top20SA, K_rank_SA %>% select(locus, rank), maxRank=20, cols=region_cols)

# ranked by Eastern Africa
kbayes_raw_coverage_top20EA <- getCumulativeCoverageGlobalRegional(kbayes_raw, K_rank_EA %>% select(locus, rank), maxRank=20) %>%
mutate(subgroup=fct_relevel(subgroup,c("Global", "Southern Asia", "Eastern Africa", "Southern Africa", "Western Africa")))
kbayes_raw_coverage_globalRegional_top20EA <- plotCoverage(kbayes_raw_coverage_top20EA, K_rank_EA %>% select(locus, rank), maxRank=20, cols=region_cols)

# ranked by Western Africa
kbayes_raw_coverage_top20WA <- getCumulativeCoverageGlobalRegional(kbayes_raw, K_rank_WA %>% select(locus, rank), maxRank=20) %>%
mutate(subgroup=fct_relevel(subgroup,c("Global", "Southern Asia", "Eastern Africa", "Southern Africa", "Western Africa")))
kbayes_raw_coverage_globalRegional_top20WA <- plotCoverage(kbayes_raw_coverage_top20WA, K_rank_WA %>% select(locus, rank), maxRank=20, cols=region_cols)

# ranked by Southern Asia
kbayes_raw_coverage_top20SAs <- getCumulativeCoverageGlobalRegional(kbayes_raw, K_rank_SAs %>% select(locus, rank), maxRank=20) %>%
mutate(subgroup=fct_relevel(subgroup,c("Global", "Southern Asia", "Eastern Africa", "Southern Africa", "Western Africa")))
kbayes_raw_coverage_globalRegional_top20SAs <- plotCoverage(kbayes_raw_coverage_top20SAs, K_rank_SAs %>% select(locus, rank), maxRank=20, cols=region_cols)

(kbayes_raw_coverage_globalRegional_top20SA + ggtitle("Top-20 K loci in S. Africa") + kbayes_raw_coverage_globalRegional_top20EA + ggtitle("Top-20 K loci in E. Africa")) / (kbayes_raw_coverage_globalRegional_top20WA + ggtitle("Top-20 K loci in W. Africa") + kbayes_raw_coverage_globalRegional_top20SAs + ggtitle("Top-20 K loci in S. Asia")) + patchwork::plot_layout(guides="collect")

ggsave("figures/FigS4_KcoverageRaw_top20PerRegion.pdf", width=8, height=8)
ggsave("figures/FigS4_KcoverageRaw_top20PerRegion.png", width=8, height=8)
numbers for text - K coverage
# coverage of top-20
kbayes_raw_coverage %>% filter(rank %in% c(5,10,15,20) & subgroup=="Global") %>% select(mean, lower, upper)
## # A tibble: 4 × 3
## mean lower upper
## <dbl> <dbl> <dbl>
## 1 0.488 0.460 0.518
## 2 0.588 0.557 0.621
## 3 0.671 0.637 0.706
## 4 0.736 0.700 0.773
kbayes_raw_coverage %>% filter(rank==10 & subgroup=="Global") %>% select(mean, lower, upper) - (kbayes_raw_coverage %>% filter(rank==5 & subgroup=="Global") %>% select(mean, lower, upper))
## mean lower upper
## 1 0.09971156 0.09681222 0.102871
kbayes_raw_coverage %>% filter(rank==15 & subgroup=="Global") %>% select(mean, lower, upper) - (kbayes_raw_coverage %>% filter(rank==10 & subgroup=="Global") %>% select(mean, lower, upper))
## mean lower upper
## 1 0.0831687 0.0806286 0.08563087
kbayes_raw_coverage %>% filter(rank==20 & subgroup=="Global") %>% select(mean, lower, upper) - (kbayes_raw_coverage %>% filter(rank==15 & subgroup=="Global") %>% select(mean, lower, upper))
## mean lower upper
## 1 0.0645007 0.06241249 0.06650469
# available data on fatal cases
data_NNS_sites10 %>% filter(!is.na(Mortality)) %>% nrow()
## [1] 1053
data_NNS_sites10 %>% filter(Mortality==1) %>% nrow()
## [1] 390
# coverage of region-focused subsets
kbayes_raw_coverage_globalRegional_top20SA$data %>% filter(rank==20)
## # A tibble: 5 × 7
## rank mean median lower upper locus subgroup
## <int> <dbl> <dbl> <dbl> <dbl> <chr> <fct>
## 1 20 0.706 0.706 0.671 0.741 KL19 Global
## 2 20 0.727 0.727 0.680 0.773 KL19 Eastern Africa
## 3 20 0.591 0.591 0.529 0.657 KL19 Southern Asia
## 4 20 0.909 0.908 0.798 1 KL19 Southern Africa
## 5 20 0.500 0.497 0.362 0.656 KL19 Western Africa
kbayes_raw_coverage_globalRegional_top20EA$data %>% filter(rank==20)
## # A tibble: 5 × 7
## rank mean median lower upper locus subgroup
## <int> <dbl> <dbl> <dbl> <dbl> <chr> <fct>
## 1 20 0.716 0.715 0.680 0.751 KL38 Global
## 2 20 0.808 0.807 0.758 0.858 KL38 Eastern Africa
## 3 20 0.577 0.577 0.516 0.642 KL38 Southern Asia
## 4 20 0.614 0.613 0.525 0.708 KL38 Southern Africa
## 5 20 0.603 0.600 0.452 0.774 KL38 Western Africa
kbayes_raw_coverage_globalRegional_top20WA$data %>% filter(rank==20)
## # A tibble: 5 × 7
## rank mean median lower upper locus subgroup
## <int> <dbl> <dbl> <dbl> <dbl> <chr> <fct>
## 1 20 0.699 0.699 0.664 0.735 KL30 Global
## 2 20 0.788 0.788 0.740 0.838 KL30 Eastern Africa
## 3 20 0.527 0.526 0.468 0.588 KL30 Southern Asia
## 4 20 0.612 0.610 0.523 0.706 KL30 Southern Africa
## 5 20 0.799 0.796 0.621 0.996 KL30 Western Africa
kbayes_raw_coverage_globalRegional_top20SAs$data %>% filter(rank==20)
## # A tibble: 5 × 7
## rank mean median lower upper locus subgroup
## <int> <dbl> <dbl> <dbl> <dbl> <chr> <fct>
## 1 20 0.705 0.705 0.669 0.741 KL122 Global
## 2 20 0.696 0.696 0.650 0.742 KL122 Eastern Africa
## 3 20 0.802 0.800 0.728 0.878 KL122 Southern Asia
## 4 20 0.624 0.623 0.534 0.721 KL122 Southern Africa
## 5 20 0.474 0.471 0.340 0.627 KL122 Western Africa
# coverage of KL set 2
kbayes_raw_coverage_top8perRegion%>% filter(rank==20)
## # A tibble: 5 × 7
## rank mean median lower upper locus subgroup
## <int> <dbl> <dbl> <dbl> <dbl> <chr> <fct>
## 1 20 0.722 0.722 0.686 0.759 KL53 Global
## 2 20 0.704 0.703 0.658 0.750 KL53 Eastern Africa
## 3 20 0.724 0.723 0.654 0.795 KL53 Southern Asia
## 4 20 0.810 0.809 0.706 0.919 KL53 Southern Africa
## 5 20 0.698 0.695 0.536 0.882 KL53 Western Africa
kbayes_raw_coverage_subgroups_top8perRegion %>% filter(rank==20)
## # A tibble: 4 × 7
## rank mean median lower upper locus subgroup
## <int> <dbl> <dbl> <dbl> <dbl> <chr> <fct>
## 1 20 0.739 0.739 0.702 0.776 KL53 ESBL
## 2 20 0.809 0.808 0.736 0.884 KL53 CP
## 3 20 0.690 0.689 0.614 0.770 KL53 Fatal
## 4 20 0.722 0.722 0.686 0.759 KL53 All
# what is extra in set2 that is not in top-20?
k_rank_region8 %>% left_join(kbayes$locus, by="locus") %>% filter(rank.y>20)
## # A tibble: 5 × 3
## locus rank.x rank.y
## <chr> <int> <int>
## 1 KL28 16 22
## 2 KL81 17 25
## 3 KL8 18 27
## 4 KL116 19 35
## 5 KL53 20 41
Global and regional O coverage - top10 global - Fig4d
obayes_raw_coverage <- getCumulativeCoverageGlobalRegional(obayes_raw, obayes$locus_rank) %>%
mutate(subgroup=fct_relevel(subgroup,c("Global", "Southern Asia", "Eastern Africa", "Southern Africa", "Western Africa")))
ocoverage_globalTop10_globalRegional <- plotCoverage(obayes_raw_coverage, obayes$locus_rank, maxRank=10, xintercept=c(5,10), cols=region_cols, col_title = "d) Region")

ocoverage_globalTop10_globalRegional

O fatal - read posterior estimates for adjusted counts (main) and
raw counts (for comparison)
obayes_fatal_raw_min10perSite <- parseModelledEstimates(global_post="outputs_core/O_Fatal_min10_raw_28_posterior_global.csv.gz", region_post="outputs_core/O_Fatal_min10_raw_28_posterior_subgroup.csv.gz", fixOnames = T)
## Rows: 132000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 528000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
obayes_fatal_raw_min10perSite_coverage <- getCumulativeCoverage(obayes$locus_rank, obayes_fatal_raw_min10perSite$global_post) %>% mutate(type="min10")
ESBL - read posterior estimates for adjusted counts (main) and raw
counts (for comparison)
obayes_esbl_raw_min10perSite <- parseModelledEstimates(global_post="outputs_core/O_ESBL_min10_raw_28_posterior_global.csv.gz", region_post="outputs_core/O_ESBL_min10_raw_28_posterior_subgroup.csv.gz", fixOnames = T)
## Rows: 156000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 624000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
obayes_esbl_raw_min10perSite_coverage <- getCumulativeCoverage(obayes$locus_rank, obayes_esbl_raw_min10perSite$global_post) %>% mutate(type="min10")
CP - read posterior estimates for adjusted counts (main) and raw
counts (for comparison)
obayes_cp_raw_min10perSite <- parseModelledEstimates(global_post="outputs_core/O_Carba_min10_raw_28_posterior_global.csv.gz", region_post="outputs_core/O_Carba_min10_raw_28_posterior_subgroup.csv.gz", fixOnames = T)
## Rows: 120000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 360000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
obayes_cp_raw_min10perSite_coverage <- getCumulativeCoverage(obayes$locus_rank, obayes_cp_raw_min10perSite$global_post) %>% mutate(type="min10")
combine coverage estimates for subgroups (fatal, ESBL, CP) -
Fig4c
obayes_raw_coverage_subgroups <- obayes_esbl_raw_min10perSite_coverage %>% mutate(subgroup="ESBL") %>%
bind_rows(obayes_cp_raw_min10perSite_coverage %>% mutate(subgroup="CP")) %>%
bind_rows(obayes_fatal_raw_min10perSite_coverage %>% mutate(subgroup="Fatal")) %>%
bind_rows(obayes_raw_coverage %>% filter(subgroup=="Global") %>% mutate(subgroup="All") ) %>%
mutate(subgroup=fct_relevel(subgroup,c("All", "Fatal", "ESBL", "CP")))
o_coverage_globalTop10_subgroups <- plotCoverage(obayes_raw_coverage_subgroups, obayes$locus_rank, maxRank=10, xintercept=c(5,10), cols=group_cols, col_title = "c) Subgroup")

o_coverage_globalTop10_subgroups

(o_prev_global_dist + o_prev_region_heatmap) / (o_coverage_globalTop10_subgroups + ggtitle("c) Global coverage") + ocoverage_globalTop10_globalRegional + ggtitle("d) Regional coverage") + patchwork::plot_layout(guides="collect"))
## Picking joint bandwidth of 0.0913
## Warning: Removed 118 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).

ggsave("figures/Fig4_O_prevAdj_coverageRaw.pdf", width=8, height=8)
## Picking joint bandwidth of 0.0913
## Warning: Removed 118 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).
ggsave("figures/Fig4_O_prevAdj_coverageRaw.png", width=8, height=8)
## Picking joint bandwidth of 0.0913
## Warning: Removed 118 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).
numbers for text - O prevalence and coverage
# O prevalence
obayes$global_est
## # A tibble: 15 × 6
## locus median mean lower upper rank
## <chr> <dbl> <dbl> <dbl> <dbl> <int>
## 1 O1.v1 0.263 0.264 0.234 0.294 1
## 2 O1.v2 0.219 0.219 0.191 0.249 2
## 3 O2afg 0.159 0.159 0.134 0.185 3
## 4 O4 0.0853 0.0856 0.0673 0.106 4
## 5 O2a 0.0641 0.0645 0.0489 0.0824 5
## 6 O3b 0.0576 0.0581 0.0429 0.0760 6
## 7 O5 0.0563 0.0568 0.0419 0.0737 7
## 8 OL13 0.0501 0.0506 0.0365 0.0669 8
## 9 O3/O3a 0.0299 0.0303 0.0197 0.0430 9
## 10 OL104 0.00340 0.00380 0.000794 0.00918 10
## 11 OL103 0.00212 0.00253 0.000327 0.00694 11
## 12 O12 0.00211 0.00250 0.000323 0.00690 12
## 13 OL102 0.000873 0.00127 0.0000343 0.00464 13
## 14 O2a.v3 0.000868 0.00127 0.0000351 0.00477 14
## 15 O1.v3 0.000873 0.00126 0.0000291 0.00468 15
obayes$region_est
## # A tibble: 60 × 6
## # Groups: locus [15]
## locus subgroup median mean lower upper
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 O1.v1 Eastern Africa 0.284 0.285 0.240 0.332
## 2 O1.v1 Southern Africa 0.261 0.263 0.199 0.332
## 3 O1.v1 Southern Asia 0.224 0.225 0.175 0.278
## 4 O1.v1 Western Africa 0.294 0.297 0.196 0.415
## 5 O1.v2 Eastern Africa 0.228 0.228 0.187 0.272
## 6 O1.v2 Southern Africa 0.221 0.222 0.163 0.288
## 7 O1.v2 Southern Asia 0.193 0.195 0.148 0.248
## 8 O1.v2 Western Africa 0.257 0.260 0.162 0.371
## 9 O1.v3 Eastern Africa 0.000836 0.00131 0.0000268 0.00531
## 10 O1.v3 Southern Africa 0.000567 0.00103 0.0000166 0.00480
## # ℹ 50 more rows
# coverage
obayes_raw_coverage
## # A tibble: 150 × 7
## rank mean median lower upper locus subgroup
## <int> <dbl> <dbl> <dbl> <dbl> <chr> <fct>
## 1 1 0.253 0.252 0.233 0.273 O1.v1 Global
## 2 2 0.433 0.433 0.407 0.460 O1.v2 Global
## 3 3 0.682 0.682 0.650 0.715 O2afg Global
## 4 4 0.804 0.804 0.769 0.840 O4 Global
## 5 5 0.855 0.855 0.819 0.893 O2a Global
## 6 6 0.883 0.883 0.845 0.920 O3b Global
## 7 7 0.921 0.921 0.883 0.959 O5 Global
## 8 8 0.969 0.969 0.930 1.01 OL13 Global
## 9 9 0.990 0.990 0.951 1.03 O3/O3a Global
## 10 10 0.992 0.992 0.952 1.03 OL104 Global
## # ℹ 140 more rows
# STs with O4 or O2a in Southern Asia
data_NNS_sites10 %>% filter(Region=="Southern Asia" & O_genotype=="O4") %>% group_by(ST, K_locus)%>% count()
## # A tibble: 16 × 3
## # Groups: ST, K_locus [16]
## ST K_locus n
## <chr> <chr> <int>
## 1 ST11 KL15 87
## 2 ST152 KL149 3
## 3 ST2258 KL131 2
## 4 ST231 KL144 1
## 5 ST273 KL141 1
## 6 ST273 KL15 3
## 7 ST340 KL15 5
## 8 ST3623 KL15 1
## 9 ST37 KL15 2
## 10 ST392 KL27 1
## 11 ST429 KL27 1
## 12 ST437 KL36 9
## 13 ST502 KL15 15
## 14 ST70 KL15 1
## 15 ST726 KL15 1
## 16 ST883-1LV KL15 1
data_NNS_sites10 %>% filter(Region=="Southern Asia" & O_genotype=="O4") %>% group_by(ST, K_locus, Site)%>% count()
## # A tibble: 26 × 4
## # Groups: ST, K_locus, Site [26]
## ST K_locus Site n
## <chr> <chr> <dbl> <int>
## 1 ST11 KL15 2 6
## 2 ST11 KL15 5 14
## 3 ST11 KL15 38 1
## 4 ST11 KL15 40 59
## 5 ST11 KL15 41 7
## 6 ST152 KL149 40 3
## 7 ST2258 KL131 38 1
## 8 ST2258 KL131 41 1
## 9 ST231 KL144 11 1
## 10 ST273 KL141 40 1
## # ℹ 16 more rows
data_NNS_sites10 %>% filter(Region=="Southern Asia" & O_genotype=="O2a") %>% group_by(ST, K_locus)%>% count()
## # A tibble: 9 × 3
## # Groups: ST, K_locus [9]
## ST K_locus n
## <chr> <chr> <int>
## 1 ST11 KL24 7
## 2 ST1306 KL146 1
## 3 ST147 KL48 1
## 4 ST147 KL64 6
## 5 ST16 KL48 6
## 6 ST268-1LV KL20 1
## 7 ST2805 KL110 2
## 8 ST441-1LV KL62 1
## 9 ST45 KL24 6
data_NNS_sites10 %>% filter(Region=="Southern Asia" & O_genotype=="O2a") %>% group_by(ST, K_locus, Site)%>% count()
## # A tibble: 17 × 4
## # Groups: ST, K_locus, Site [17]
## ST K_locus Site n
## <chr> <chr> <dbl> <int>
## 1 ST11 KL24 6 4
## 2 ST11 KL24 26 1
## 3 ST11 KL24 38 2
## 4 ST1306 KL146 15 1
## 5 ST147 KL48 40 1
## 6 ST147 KL64 10 2
## 7 ST147 KL64 11 1
## 8 ST147 KL64 15 1
## 9 ST147 KL64 40 1
## 10 ST147 KL64 41 1
## 11 ST16 KL48 5 2
## 12 ST16 KL48 40 4
## 13 ST268-1LV KL20 6 1
## 14 ST2805 KL110 38 2
## 15 ST441-1LV KL62 41 1
## 16 ST45 KL24 2 5
## 17 ST45 KL24 15 1
data_NNS_sites10 %>% filter(Region=="Southern Africa" & O_genotype=="O5") %>% group_by(ST, K_locus)%>% count()
## # A tibble: 3 × 3
## # Groups: ST, K_locus [3]
## ST K_locus n
## <chr> <chr> <int>
## 1 ST17 KL25 37
## 2 ST2621 KL25 1
## 3 ST3184 KL25 1
data_NNS_sites10 %>% filter(Region=="Southern Africa" & O_genotype=="O5") %>% group_by(ST, K_locus, Site, Country)%>% count()
## # A tibble: 10 × 5
## # Groups: ST, K_locus, Site, Country [10]
## ST K_locus Site Country n
## <chr> <chr> <dbl> <chr> <int>
## 1 ST17 KL25 30 South Africa 2
## 2 ST17 KL25 31 South Africa 3
## 3 ST17 KL25 32 South Africa 4
## 4 ST17 KL25 33 South Africa 7
## 5 ST17 KL25 34 South Africa 2
## 6 ST17 KL25 35 South Africa 2
## 7 ST17 KL25 45 South Africa 9
## 8 ST17 KL25 59 Botswana 8
## 9 ST2621 KL25 33 South Africa 1
## 10 ST3184 KL25 35 South Africa 1
# top-5 coverage
obayes_raw_coverage %>% filter(rank==5)
## # A tibble: 5 × 7
## rank mean median lower upper locus subgroup
## <int> <dbl> <dbl> <dbl> <dbl> <chr> <fct>
## 1 5 0.855 0.855 0.819 0.893 O2a Global
## 2 5 0.919 0.919 0.871 0.968 O2a Eastern Africa
## 3 5 0.757 0.757 0.688 0.829 O2a Southern Asia
## 4 5 0.833 0.830 0.664 1.02 O2a Western Africa
## 5 5 0.774 0.773 0.676 0.874 O2a Southern Africa
obayes_raw_coverage_subgroups %>% filter(rank==5)
## # A tibble: 4 × 8
## rank mean median lower upper locus type subgroup
## <int> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <fct>
## 1 5 0.865 0.865 0.828 0.903 O2a min10 ESBL
## 2 5 0.774 0.774 0.704 0.844 O2a min10 CP
## 3 5 0.897 0.896 0.813 0.985 O2a min10 Fatal
## 4 5 0.855 0.855 0.819 0.893 O2a <NA> All
Table S5 - O coverage stats for all/ESBL/CP/fatal, for global and
regional
obayes_fatal_raw_min10perSite_coverageRegional <- getCumulativeCoverageGlobalRegional(ranks=obayes$locus_rank, bayes=obayes_fatal_raw_min10perSite, maxRank=10)
obayes_esbl_raw_min10perSite_coverageRegional <- getCumulativeCoverageGlobalRegional(ranks=obayes$locus_rank, bayes=obayes_esbl_raw_min10perSite, maxRank=10)
obayes_cp_raw_min10perSite_coverageRegional <- getCumulativeCoverageGlobalRegional(ranks=obayes$locus_rank, bayes=obayes_cp_raw_min10perSite, maxRank=10)
tableS5 <- obayes_raw_coverage %>% filter(rank <= 10) %>% mutate(cases="All") %>%
bind_rows(obayes_fatal_raw_min10perSite_coverageRegional %>% mutate(cases=" Fatal infections")) %>%
bind_rows(obayes_esbl_raw_min10perSite_coverageRegional %>% mutate(cases=" ESBL infections")) %>%
bind_rows(obayes_cp_raw_min10perSite_coverageRegional %>% mutate(cases=" CP infections")) %>%
mutate(upper=if_else(upper>1, 1, upper)) %>%
mutate(summary=paste0(round(mean,3)*100," (",round(lower,3)*100,"-",round(upper,3)*100,")")) %>%
select(locus, subgroup, cases, summary) %>%
pivot_wider(names_from=locus, values_from=summary, id_cols=c(subgroup, cases)) %>%
rename(Region=subgroup)
add totals per subgroup
tableS5 <- esbl_counts %>% mutate(cases=" ESBL infections") %>%
bind_rows(cp_counts %>% mutate(cases=" CP infections")) %>%
bind_rows(fatal_counts %>% mutate(cases=" Fatal infections")) %>%
bind_rows(total_counts %>% mutate(cases="All")) %>%
full_join(tableS5, by=c("Region", "cases")) %>%
mutate(cases=fct_relevel(cases,c("All", " Fatal infections", " ESBL infections", " CP infections")))
# sort function isn't working when ordering region factor using fct_relevel
tableS5$Region <- factor(tableS5$Region, levels = c("Global", "Eastern Africa", "Southern Africa", "Western Africa", "Southern Asia"))
tableS5 <- tableS5 %>%
arrange(Region, cases) %>%
relocate(n, .after=cases) %>%
mutate(cases=if_else(cases=="All", Region, cases)) %>%
rename(N=n) %>%
rename(Data=cases) %>%
ungroup %>% select(-Region)
write_tsv(tableS5, file="tables/TableS5_coverage.tsv")
numbers for text - effect of cluster adjustment
kbayes_global %>% mutate(diff=mean.raw-mean.adj) %>% select(locus, mean.raw, mean.adj, diff, rank.raw, rank.adj) %>% arrange(-diff)
## # A tibble: 90 × 6
## locus mean.raw mean.adj diff rank.raw rank.adj
## <chr> <dbl> <dbl> <dbl> <int> <int>
## 1 KL102 0.181 0.0872 0.0933 1 2
## 2 KL15 0.0933 0.0543 0.0390 3 4
## 3 KL2 0.120 0.0897 0.0301 2 1
## 4 KL104 0.0341 0.00880 0.0253 6 30
## 5 KL157 0.0154 0.00126 0.0142 14 87
## 6 KL81 0.0256 0.0126 0.0129 9 25
## 7 KL108 0.0219 0.0101 0.0118 11 28
## 8 KL149 0.0267 0.0215 0.00514 8 13
## 9 KL127 0.00586 0.00380 0.00206 34 50
## 10 KL145 0.00321 0.00128 0.00194 38 72
## # ℹ 80 more rows
data_NNS_sites10 %>% filter(K_locus=="KL102") %>% group_by(Cluster, Country, ST) %>% count() %>% arrange(-n) %>% mutate(p=n/data_NNS_sites10 %>% filter(K_locus=="KL102") %>% nrow()) %>% ungroup() %>% mutate(cum=cumsum(p))
## # A tibble: 69 × 6
## Cluster Country ST n p cum
## <chr> <chr> <chr> <int> <dbl> <dbl>
## 1 SZ1 Zambia ST307 203 0.599 0.599
## 2 CHRF187 Bangladesh ST147 21 0.0619 0.661
## 3 SZ3 Zambia ST307 11 0.0324 0.693
## 4 Kilifi141 Kenya ST3247 8 0.0236 0.717
## 5 BG3 South Africa ST307 5 0.0147 0.732
## 6 CHRF120 Bangladesh ST307 5 0.0147 0.746
## 7 MB31 Ghana ST307 5 0.0147 0.761
## 8 BG33 South Africa ST307 4 0.0118 0.773
## 9 ML65 Malawi ST307 4 0.0118 0.785
## 10 SZ30 Zambia ST307 4 0.0118 0.796
## # ℹ 59 more rows
data_NNS_sites10 %>% filter(K_locus=="KL2") %>% group_by(Cluster, Country, ST) %>% count() %>% arrange(-n) %>% mutate(p=n/data_NNS_sites10 %>% filter(K_locus=="KL2") %>% nrow()) %>% ungroup() %>% mutate(cum=cumsum(p))
## # A tibble: 71 × 6
## Cluster Country ST n p cum
## <chr> <chr> <chr> <int> <dbl> <dbl>
## 1 ML2 Malawi ST39 87 0.387 0.387
## 2 ML9 Malawi ST14 23 0.102 0.489
## 3 Kiambu sub-County Hospital30 Kenya ST14 12 0.0533 0.542
## 4 Mbagathi54 Kenya ST14 11 0.0489 0.591
## 5 Kilifi86 Kenya ST14 7 0.0311 0.622
## 6 ML1 Malawi ST14 6 0.0267 0.649
## 7 ML29 Malawi ST25 4 0.0178 0.667
## 8 ML3 Malawi ST25 4 0.0178 0.684
## 9 Kilifi125 Kenya ST101 3 0.0133 0.698
## 10 MB85 Malawi ST25 2 0.00889 0.707
## # ℹ 61 more rows
data_NNS_sites10 %>% filter(K_locus=="KL15") %>% group_by(Cluster, Country, ST) %>% count() %>% arrange(-n) %>% mutate(p=n/data_NNS_sites10 %>% filter(K_locus=="KL15") %>% nrow()) %>% ungroup() %>% mutate(cum=cumsum(p))
## # A tibble: 41 × 6
## Cluster Country ST n p cum
## <chr> <chr> <chr> <int> <dbl> <dbl>
## 1 CHRF149 Bangladesh ST11 66 0.377 0.377
## 2 BA24 Ethiopia ST37 26 0.149 0.526
## 3 CHRF169 Bangladesh ST502 14 0.08 0.606
## 4 NEO2 India ST11 9 0.0514 0.657
## 5 ML94 Malawi ST45 7 0.04 0.697
## 6 aiims10 India ST11 6 0.0343 0.731
## 7 MB49 Zambia ST5856 4 0.0229 0.754
## 8 BA20 Ethiopia ST37 3 0.0171 0.771
## 9 CHRF143 Bangladesh ST273 3 0.0171 0.789
## 10 CHRF186 Bangladesh ST340 2 0.0114 0.8
## # ℹ 31 more rows
data_NNS_sites10 %>% filter(K_locus=="KL104") %>% group_by(Cluster, Country, ST) %>% count() %>% arrange(-n) %>% mutate(p=n/data_NNS_sites10 %>% filter(K_locus=="KL104") %>% nrow()) %>% ungroup() %>% mutate(cum=cumsum(p))
## # A tibble: 8 × 6
## Cluster Country ST n p cum
## <chr> <chr> <chr> <int> <dbl> <dbl>
## 1 MB98 Tanzania ST1741 50 0.781 0.781
## 2 Kilifi117 Kenya ST1741 5 0.0781 0.859
## 3 aiims21 India ST1741 3 0.0469 0.906
## 4 aiims13 India ST1741 2 0.0312 0.938
## 5 MB94 Tanzania ST1741 1 0.0156 0.953
## 6 MB96 Tanzania ST1741 1 0.0156 0.969
## 7 MB98 Tanzania ST1741-1LV 1 0.0156 0.984
## 8 ML46 Malawi ST245-2LV 1 0.0156 1
data_NNS_sites10 %>% filter(K_locus=="KL157") %>% group_by(Cluster, Country, ST) %>% count() %>% arrange(-n) %>% mutate(p=n/data_NNS_sites10 %>% filter(K_locus=="KL157") %>% nrow()) %>% ungroup() %>% mutate(cum=cumsum(p))
## # A tibble: 1 × 6
## Cluster Country ST n p cum
## <chr> <chr> <chr> <int> <dbl> <dbl>
## 1 Kiambu sub-County Hospital42 Kenya ST6775 29 1 1
data_NNS_sites10 %>% filter(K_locus=="KL81") %>% group_by(Cluster, Country, ST) %>% count() %>% arrange(-n) %>% mutate(p=n/data_NNS_sites10 %>% filter(K_locus=="KL81") %>% nrow()) %>% ungroup() %>% mutate(cum=cumsum(p))
## # A tibble: 9 × 6
## Cluster Country ST n p cum
## <chr> <chr> <chr> <int> <dbl> <dbl>
## 1 CHRF182 Bangladesh ST16 28 0.583 0.583
## 2 AKU15 Pakistan ST16 7 0.146 0.729
## 3 CHRF126 Bangladesh ST16 6 0.125 0.854
## 4 AKU69 Pakistan ST16 2 0.0417 0.896
## 5 AKU39 Pakistan ST16 1 0.0208 0.917
## 6 CHRF165 Bangladesh ST16 1 0.0208 0.938
## 7 CHRF167 Bangladesh ST16 1 0.0208 0.958
## 8 CHRF176 Bangladesh ST16 1 0.0208 0.979
## 9 CHRF178 Bangladesh ST16 1 0.0208 1
data_NNS_sites10 %>% filter(K_locus=="KL108") %>% group_by(Cluster, Country, ST) %>% count() %>% arrange(-n) %>% mutate(p=n/data_NNS_sites10 %>% filter(K_locus=="KL108") %>% nrow()) %>% ungroup() %>% mutate(cum=cumsum(p))
## # A tibble: 8 × 6
## Cluster Country ST n p cum
## <chr> <chr> <chr> <int> <dbl> <dbl>
## 1 BA23 Ethiopia ST35 32 0.780 0.780
## 2 BA30 Ethiopia ST35 3 0.0732 0.854
## 3 BA25 Ethiopia ST35 1 0.0244 0.878
## 4 BA26 Ethiopia ST35 1 0.0244 0.902
## 5 BA32 Ethiopia ST35 1 0.0244 0.927
## 6 BA73 Nigeria ST395 1 0.0244 0.951
## 7 ML149 Malawi ST110 1 0.0244 0.976
## 8 WI35 South Africa ST35 1 0.0244 1
data_NNS_sites10 %>% filter(K_locus=="KL149") %>% group_by(Cluster, Country, ST) %>% count() %>% arrange(-n) %>% mutate(p=n/data_NNS_sites10 %>% filter(K_locus=="KL149") %>% nrow()) %>% ungroup() %>% mutate(cum=cumsum(p))
## # A tibble: 17 × 6
## Cluster Country ST n p cum
## <chr> <chr> <chr> <int> <dbl> <dbl>
## 1 BG27 South Africa ST152 16 0.32 0.32
## 2 WI20 South Africa ST39 10 0.2 0.52
## 3 BG77 South Africa ST39 5 0.1 0.62
## 4 CHRF139 Bangladesh ST152 3 0.06 0.68
## 5 BG16 South Africa ST152 2 0.04 0.72
## 6 BG37 South Africa ST152 2 0.04 0.76
## 7 WI43 South Africa ST39 2 0.04 0.8
## 8 BG40 South Africa ST39 1 0.02 0.82
## 9 BG72 South Africa ST39 1 0.02 0.84
## 10 BG84 South Africa ST152 1 0.02 0.86
## 11 BG93 South Africa ST39 1 0.02 0.88
## 12 BG96 South Africa ST39 1 0.02 0.9
## 13 MB70 Ethiopia ST152 1 0.02 0.92
## 14 WI15 South Africa ST152 1 0.02 0.94
## 15 WI17 South Africa ST39 1 0.02 0.96
## 16 WI25 South Africa ST39 1 0.02 0.98
## 17 WI36 South Africa ST152 1 0.02 1
# how do these compare to those added to KL set 2 to get good region coverage? - only KL81
k_rank_region8 %>% left_join(kbayes$locus, by="locus") %>% filter(rank.y>20)
## # A tibble: 5 × 3
## locus rank.x rank.y
## <chr> <int> <int>
## 1 KL28 16 22
## 2 KL81 17 25
## 3 KL8 18 27
## 4 KL116 19 35
## 5 KL53 20 41
Leave One out (LOO)
Sensitivity analysis - O type, Bayesian estimate, adjusted
loo <- obayes$global_est %>% mutate(Study="None")
loo <- read_csv("outputs_LOO/O_Full_min10_adj_28_LOO_AKU_global_estimates.csv") %>%
mutate(Study="AKU") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 15 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/O_Full_min10_adj_28_LOO_BabyGERMS_global_estimates.csv") %>%
mutate(Study="BabyGERMS") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 15 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/O_Full_min10_adj_28_LOO_BARNARDS_global_estimates.csv") %>%
mutate(Study="BARNARDS") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 14 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/O_Full_min10_adj_28_LOO_Botswana_global_estimates.csv") %>%
mutate(Study="Botswana") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 15 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/O_Full_min10_adj_28_LOO_CHRF_global_estimates.csv") %>%
mutate(Study="CHRF") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 14 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/O_Full_min10_adj_28_LOO_AIIMS_global_estimates.csv") %>%
mutate(Study="DH") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 15 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/O_Full_min10_adj_28_LOO_GBS_global_estimates.csv") %>%
mutate(Study="GBSCOP") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 15 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/O_Full_min10_adj_28_LOO_Kilifi_global_estimates.csv") %>%
mutate(Study="Kilifi") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 14 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/O_Full_min10_adj_28_LOO_MBIRA_global_estimates.csv") %>%
mutate(Study="MBIRA") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 15 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/O_Full_min10_adj_28_LOO_MLW_global_estimates.csv") %>%
mutate(Study="MLW") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 14 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/O_Full_min10_adj_28_LOO_NeoBAC_global_estimates.csv") %>%
mutate(Study="NeoBAC") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 15 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/O_Full_min10_adj_28_LOO_NeoOBS_global_estimates.csv") %>%
mutate(Study="NeoOBS-India") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 15 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo_O <- read_csv("outputs_LOO/O_Full_min10_adj_28_LOO_SPINZ_global_estimates.csv") %>%
mutate(Study="SPINZ") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo) %>%
mutate(locus=replace(locus, locus=="O2a.v1", "O2a")) %>%
mutate(locus=replace(locus, locus=="O2afg.v2", "O2afg"))
## Rows: 15 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Bayes global sensitivity analysis, top20 in any, print rank - O
type
loo_O_plot <- loo_O %>%
filter(rank<=9) %>%
ggplot(aes(y=locus, x=Study, fill=factor(rank))) +
geom_tile() +
geom_text(aes(y=locus, x=Study, label=round(rank)), size=4) +
scale_y_discrete(limits=rev(obayes$locus_rank$locus[1:9])) +
scale_x_discrete(limits=rev(unique(loo_O$Study))) + # keep the order as per tibble, starting with 'None'
#scale_fill_gradient("Global\nprevalence (%)", low = "white", high = "#081d58")
scale_fill_manual(values = c(rev(brewer.pal(5, "Reds")), rev(brewer.pal(6, "Blues")[-1]))) +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10,
colour = "black"),
axis.title = element_text(size = 10, colour = "black"),
axis.text.y = element_text(size = 10, colour = "black"),
plot.title = element_text(hjust=0.5),
legend.position = "right") +
labs(y="", x="Excluded study", title="Sensitivity analysis - O types", fill="Rank")
Sensitivity analysis - K locus, Bayesian estimate, adjusted
loo <- kbayes$global_est %>% mutate(Study="None")
loo <- read_csv("outputs_LOO/K_Full_min10_adj_28_LOO_AKU_global_estimates.csv") %>%
mutate(Study="AKU") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 87 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/K_Full_min10_adj_28_LOO_BabyGERMS_global_estimates.csv") %>%
mutate(Study="BabyGERMS") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 89 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/K_Full_min10_adj_28_LOO_BARNARDS_global_estimates.csv") %>%
mutate(Study="BARNARDS") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 85 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/K_Full_min10_adj_28_LOO_Botswana_global_estimates.csv") %>%
mutate(Study="Botswana") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 90 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/K_Full_min10_adj_28_LOO_CHRF_global_estimates.csv") %>%
mutate(Study="CHRF") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 84 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/K_Full_min10_adj_28_LOO_AIIMS_global_estimates.csv") %>%
mutate(Study="DH") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 89 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/K_Full_min10_adj_28_LOO_GBS_global_estimates.csv") %>%
mutate(Study="GBSCOP") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 90 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/K_Full_min10_adj_28_LOO_Kilifi_global_estimates.csv") %>%
mutate(Study="Kilifi") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 85 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/K_Full_min10_adj_28_LOO_MBIRA_global_estimates.csv") %>%
mutate(Study="MBIRA") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 88 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/K_Full_min10_adj_28_LOO_MLW_global_estimates.csv") %>%
mutate(Study="MLW") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 87 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/K_Full_min10_adj_28_LOO_NeoBAC_global_estimates.csv") %>%
mutate(Study="NeoBAC") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 89 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/K_Full_min10_adj_28_LOO_NeoOBS_global_estimates.csv") %>%
mutate(Study="NeoOBS-India") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 90 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
loo <- read_csv("outputs_LOO/K_Full_min10_adj_28_LOO_SPINZ_global_estimates.csv") %>%
mutate(Study="SPINZ") %>%
arrange(-mean) %>%
mutate(rank = min_rank(desc(mean))) %>%
bind_rows(loo)
## Rows: 90 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (4): mean, median, lower, upper
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
sensitivity analysis for K, top20 in any, print rank
top20_sens <- loo %>% filter(rank<=20) %>% pull(locus) %>% unique()
loo_k_ranks <- loo %>%
filter(rank<=20) %>%
ggplot(aes(y=locus, x=Study, fill=factor(rank))) +
geom_tile() +
geom_text(aes(y=locus, x=Study, label=round(rank)), size=4) +
scale_y_discrete(limits=rev(kbayes$locus_rank$locus[kbayes$locus_rank$locus %in% top20_sens])) +
scale_x_discrete(limits=rev(unique(loo$Study))) + # keep the order as per tibble, starting with 'None'
#scale_fill_gradient("Global\nprevalence (%)", low = "white", high = "#081d58")
scale_fill_manual(values = c(rev(brewer.pal(5, "Reds")), rev(brewer.pal(6, "Purples")[-1]), rev(brewer.pal(6, "Blues")[-1]), rev(brewer.pal(5, "Greens")))) +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10,
colour = "black"),
axis.title = element_text(size = 10, colour = "black"),
axis.text.y = element_text(size = 10, colour = "black"),
plot.title = element_text(hjust=0.5),
legend.position = "right") +
labs(y="", x="Excluded study", title="Sensitivity analysis", fill="Rank") +
geom_hline(yintercept=length(top20_sens)+0.5-5) +
geom_hline(yintercept=length(top20_sens)+0.5-10) +
geom_hline(yintercept=length(top20_sens)+0.5-15) +
geom_hline(yintercept=length(top20_sens)+0.5-20)
LOO - K coverage
studies <- unique(data_NNS_sites10$Study)
studies <- studies[-c(8,12)]
studies <- c(studies, c("AIIMS", "NeoOBS"))
loo_k_coverage <- tibble(
rank = integer(0),
mean = double(0),
median = double(0),
lower = double(0),
upper = double(0),
locus = character(0),
subgroup = character(0),
Study = character(0)
)
for (study in studies) {
# read raw Bayesian estimates for K
loo_kbayes_raw <- parseModelledEstimates(global_post=paste0("outputs_LOO/K_Full_min10_raw_28_LOO_",study,"_posterior_global.csv.gz"), region_post=paste0("outputs_LOO/K_Full_min10_raw_28_LOO_",study,"_posterior_subgroup.csv.gz"))
this_coverage <- getCumulativeCoverageGlobalRegional(loo_kbayes_raw, kbayes$locus_rank) %>%
mutate(subgroup=fct_relevel(subgroup,c("Global", "Southern Asia", "Eastern Africa", "Southern Africa", "Western Africa"))) %>%
mutate(Study=study)
loo_k_coverage <- bind_rows(loo_k_coverage, this_coverage)
}
## Rows: 1068000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 4272000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 1080000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 4320000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 1044000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 4176000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 1008000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 4032000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 1020000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 4080000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 1080000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 4320000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 1080000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 4320000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 1020000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 4080000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 1068000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 4272000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 1044000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 4176000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 1056000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 4224000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 1068000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 4272000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 1080000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 4320000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
loo_k_coverage_plot <- loo_k_coverage %>%
mutate(subgroup=fct_relevel(subgroup,rev(c("Global", "Southern Asia", "Eastern Africa", "Southern Africa", "Western Africa")))) %>%
bind_rows(kbayes_raw_coverage %>% mutate(Study="None")) %>%
filter(rank==20) %>%
mutate(Study=if_else(Study=="NeoOBS", "NeoOBS-India", Study)) %>%
mutate(Study=if_else(Study=="AIIMS", "DH", Study)) %>%
mutate(Study=if_else(Study=="GBS", "GBSCOP", Study)) %>%
ggplot(aes(x=Study, y=subgroup, fill=mean)) +
geom_tile() +
geom_text(aes(y=subgroup, x=Study, label=round(100*mean, 2)), size=3) +
scale_x_discrete(limits=rev(unique(loo$Study))) +
scale_fill_gradient(low="white", high="grey") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10,
colour = "black"),
axis.title = element_text(size = 10, colour = "black"),
axis.text.y = element_text(size = 10, colour = "black"),
plot.title = element_text(hjust=0.5),
legend.position = "right") +
labs(y="", x="Excluded study", fill="Coverage")
Appendix Fig S2.1 - LOO K
loo_k_ranks / loo_k_coverage_plot + patchwork::plot_layout(heights=c(4,1), axes="collect")

ggsave(file="figures/AppendixFigS2.1_LOO_BayesGlobal_Klocus.pdf", width=9, height=11)
ggsave(file="figures/AppendixFigS2.1_LOO_BayesGlobal_Klocus.png", width=9, height=11)
loo_o_coverage <- tibble(
rank = integer(0),
mean = double(0),
median = double(0),
lower = double(0),
upper = double(0),
locus = character(0),
subgroup = character(0),
Study = character(0)
)
for (study in studies) {
# read raw Bayesian estimates for K
loo_obayes_raw <- parseModelledEstimates(global_post=paste0("outputs_LOO/O_Full_min10_raw_28_LOO_",study,"_posterior_global.csv.gz"), region_post=paste0("outputs_LOO/O_Full_min10_raw_28_LOO_",study,"_posterior_subgroup.csv.gz"), fixOnames = T)
this_coverage <- getCumulativeCoverageGlobalRegional(loo_obayes_raw, obayes$locus_rank) %>%
mutate(subgroup=fct_relevel(subgroup,c("Global", "Southern Asia", "Eastern Africa", "Southern Africa", "Western Africa"))) %>%
mutate(Study=study)
loo_o_coverage <- bind_rows(loo_o_coverage, this_coverage)
}
## Rows: 180000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 720000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 180000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 720000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 168000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 672000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 168000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 672000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 168000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 672000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 180000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 720000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 180000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 720000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 168000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 672000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 180000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 720000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 180000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 720000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 180000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 720000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 180000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 720000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
## Rows: 180000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 720000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
loo_o_coverage_plot <- loo_o_coverage %>%
mutate(subgroup=fct_relevel(subgroup,rev(c("Global", "Southern Asia", "Eastern Africa", "Southern Africa", "Western Africa")))) %>%
bind_rows(obayes_raw_coverage %>% mutate(Study="None")) %>%
filter(rank==5) %>%
mutate(Study=if_else(Study=="NeoOBS", "NeoOBS-India", Study)) %>%
mutate(Study=if_else(Study=="AIIMS", "DH", Study)) %>%
mutate(Study=if_else(Study=="GBS", "GBSCOP", Study)) %>%
ggplot(aes(x=Study, y=subgroup, fill=mean)) +
geom_tile() +
geom_text(aes(y=subgroup, x=Study, label=round(100*mean, 2)), size=3) +
scale_x_discrete(limits=rev(unique(loo$Study))) +
scale_fill_gradient(low="lightgrey", high="darkgrey") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10,
colour = "black"),
axis.title = element_text(size = 10, colour = "black"),
axis.text.y = element_text(size = 10, colour = "black"),
plot.title = element_text(hjust=0.5),
legend.position = "right") +
labs(y="", x="Excluded study", fill="Coverage")
Appendix Fig S2.2
loo_O_plot / loo_o_coverage_plot + plot_layout(heights=c(2,1), axes="collect")

ggsave(file="figures/AppendixFigS2.2_LOO_BayesGlobal_Olocus.pdf", width=9, height=9)
ggsave(file="figures/AppendixFigS2.2_LOO_BayesGlobal_Olocus.png", width=9, height=9)
EFFECT OF TEMPORAL THRESHOLD FOR CLUSTERING ON MODELLED
ESTIMATES
compare global prevalence distributions using counts
cluster-adjusted using 28 vs 365-day threshold
# read Bayesian estimates for K adjusted using 365 days
kbayes_365 <- parseModelledEstimates(global_post="outputs_core/K_Full_min10_adj_365_posterior_global.csv.gz", region_post="outputs_core/K_Full_min10_adj_28_posterior_subgroup.csv.gz")
## Rows: 1092000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 4320000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
# plot raw vs adjusted distributions, overlaid - for fig S15
k_dist_28_365 <- comparative_locus_ridgesplot(posterior1=kbayes$global_post,
posterior2=kbayes_365$global_post,
ranks=kbayes$locus_rank,
lines_every=10, xlim=c(0,12), xbreaks=seq(0,12,1),
maxRank=30, type1="28 days", type2="365 days", legend_title="Cluster threshold", scale=2, title="e) Modelled K estimates - posterior")
## Joining with `by = join_by(locus)`
## Picking joint bandwidth of 0.0765
## Warning: Removed 99 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).

# read Bayesian estimates for O adjusted using 365 days
obayes_365 <- parseModelledEstimates(global_post="outputs_core/O_Full_min10_adj_365_posterior_global.csv.gz", region_post="outputs_core/O_Full_min10_adj_365_posterior_subgroup.csv.gz", fixOnames = T)
## Rows: 180000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 720000 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): locus, subgroup
## dbl (2): draw_id, prob
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'locus'. You can override using the `.groups` argument.
# plot raw vs adjusted distributions, overlaid - for fig S15
o_dist_28_365 <- comparative_locus_ridgesplot(posterior1=obayes$global_post,
posterior2=obayes_365$global_post,
ranks=obayes$locus_rank,
lines_every=5, xlim=c(0,30), xbreaks=seq(0,30,2),
maxRank=15, type1="28 days", type2="365 days", legend_title="Cluster threshold", scale=2, title="f) Modelled O estimates - posterior")
## Joining with `by = join_by(locus)`
## Picking joint bandwidth of 0.0965
## Warning: Removed 126 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).

Scatter plots for K mean and rank
adj_28_vs_365 <- full_join(kbayes$global_est %>% arrange(-mean) %>% ungroup %>% mutate(rank=row_number()),
kbayes_365$global_est %>% arrange(-mean) %>% ungroup() %>% mutate(rank=row_number()),
by="locus", suffix=c(".28", ".365"))
timeThreshold_estimates_adj <- adj_28_vs_365 %>%
ggplot(aes(x=mean.28, y=mean.365)) +
geom_abline(intercept=0, slope=1, col="grey") +
geom_point() + theme_bw() +
labs(x="28-day threshold", y="365-day threshold", title="c) Mean K estimates")
timeThreshold_ranks_adj <- adj_28_vs_365 %>%
ggplot(aes(x=rank.28, y=rank.365)) +
geom_abline(intercept=0, slope=1, col="grey") +
geom_point() + theme_bw() +
labs(x="28-day threshold", y="365-day threshold", title="a) Modelled K ranks")
timeThreshold_estimates_adj + timeThreshold_ranks_adj
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).

Scatter plots for O mean and rank
adj_28_vs_365_o <- full_join(obayes$global_est %>% arrange(-mean) %>% ungroup %>% mutate(rank=row_number()),
obayes_365$global_est %>% arrange(-mean) %>% ungroup() %>% mutate(rank=row_number()),
by="locus", suffix=c(".28", ".365"))
timeThreshold_estimates_adj_o <- adj_28_vs_365_o %>%
ggplot(aes(x=mean.28, y=mean.365)) +
geom_abline(intercept=0, slope=1, col="grey") +
geom_point() + theme_bw() +
labs(x="28-day threshold", y="365-day threshold", title="d) Mean O estimates")
timeThreshold_ranks_adj_o <- adj_28_vs_365_o %>%
ggplot(aes(x=rank.28, y=rank.365)) +
geom_abline(intercept=0, slope=1, col="grey") +
geom_point() + theme_bw() +
labs(x="28-day threshold", y="365-day threshold", title="b) Modelled O ranks")
timeThreshold_estimates_adj_o + timeThreshold_ranks_adj_o

Appendix Fig S2.5
(timeThreshold_ranks_adj + timeThreshold_ranks_adj_o) / (timeThreshold_estimates_adj + timeThreshold_estimates_adj_o) / (k_dist_28_365 + o_dist_28_365 + patchwork::plot_layout(guides="collect")) + patchwork::plot_layout(heights=c(1,1,2)) & theme(legend.position='bottom')
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Picking joint bandwidth of 0.0765
## Warning: Removed 99 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).
## Picking joint bandwidth of 0.0965
## Warning: Removed 126 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).

ggsave("figures/AppendixFigS2.5_temporalThresholdModelled.pdf", width=8, height=10)
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Picking joint bandwidth of 0.0765
## Warning: Removed 99 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).
## Picking joint bandwidth of 0.0965
## Warning: Removed 126 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).
ggsave("figures/AppendixFigS2.5_temporalThresholdModelled.png", width=8, height=10)
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Picking joint bandwidth of 0.0765
## Warning: Removed 99 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).
## Picking joint bandwidth of 0.0965
## Warning: Removed 126 rows containing non-finite outside the scale range
## (`stat_density_ridges()`).